1 static char help[] = "Driver for benchmarking SpMV."; 2 3 #include <petscmat.h> 4 #include "cJSON.h" 5 #include "mmloader.h" 6 7 char *read_file(const char *filename) 8 { 9 FILE *file = NULL; 10 long length = 0; 11 char *content = NULL; 12 size_t read_chars = 0; 13 14 /* open in read binary mode */ 15 file = fopen(filename, "rb"); 16 if (file) { 17 /* get the length */ 18 fseek(file, 0, SEEK_END); 19 length = ftell(file); 20 fseek(file, 0, SEEK_SET); 21 /* allocate content buffer */ 22 content = (char *)malloc((size_t)length + sizeof("")); 23 /* read the file into memory */ 24 read_chars = fread(content, sizeof(char), (size_t)length, file); 25 content[read_chars] = '\0'; 26 fclose(file); 27 } 28 return content; 29 } 30 31 void write_file(const char *filename, const char *content) 32 { 33 FILE *file = NULL; 34 file = fopen(filename, "w"); 35 if (file) { fputs(content, file); } 36 fclose(file); 37 } 38 39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat) 40 { 41 char *content = read_file(inputjsonfile); 42 cJSON *matrix_json = NULL; 43 const cJSON *problem = NULL, *elem = NULL; 44 const cJSON *item = NULL; 45 char **filenames, **groupnames, **matnames; 46 int i, n; 47 if (!content) return 0; 48 matrix_json = cJSON_Parse(content); 49 if (!matrix_json) return 0; 50 n = cJSON_GetArraySize(matrix_json); 51 *nmat = n; 52 filenames = (char **)malloc(sizeof(char *) * n); 53 groupnames = (char **)malloc(sizeof(char *) * n); 54 matnames = (char **)malloc(sizeof(char *) * n); 55 for (i = 0; i < n; i++) { 56 elem = cJSON_GetArrayItem(matrix_json, i); 57 item = cJSON_GetObjectItemCaseSensitive(elem, "filename"); 58 filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1)); 59 strcpy(filenames[i], item->valuestring); 60 problem = cJSON_GetObjectItemCaseSensitive(elem, "problem"); 61 item = cJSON_GetObjectItemCaseSensitive(problem, "group"); 62 groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 63 strcpy(groupnames[i], item->valuestring); 64 item = cJSON_GetObjectItemCaseSensitive(problem, "name"); 65 matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 66 strcpy(matnames[i], item->valuestring); 67 } 68 cJSON_Delete(matrix_json); 69 free(content); 70 *outputfilenames = filenames; 71 *outputgroupnames = groupnames; 72 *outputmatnames = matnames; 73 return 0; 74 } 75 76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions) 77 { 78 char *content = read_file(inputjsonfile); 79 cJSON *matrix_json = NULL; 80 cJSON *elem = NULL; 81 int i, n; 82 if (!content) return 0; 83 matrix_json = cJSON_Parse(content); 84 if (!matrix_json) return 0; 85 n = cJSON_GetArraySize(matrix_json); 86 for (i = 0; i < n; i++) { 87 cJSON *spmv = NULL; 88 cJSON *format = NULL; 89 elem = cJSON_GetArrayItem(matrix_json, i); 90 spmv = cJSON_GetObjectItem(elem, "spmv"); 91 if (spmv) { 92 format = cJSON_GetObjectItem(spmv, matformat); 93 if (format) { 94 cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 95 cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions); 96 } else { 97 format = cJSON_CreateObject(); 98 cJSON_AddItemToObject(spmv, matformat, format); 99 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 100 cJSON_AddNumberToObject(format, "repetitions", repetitions); 101 } 102 } else { 103 spmv = cJSON_CreateObject(); 104 cJSON_AddItemToObject(elem, "spmv", spmv); 105 format = cJSON_CreateObject(); 106 cJSON_AddItemToObject(spmv, matformat, format); 107 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 108 cJSON_AddNumberToObject(format, "repetitions", repetitions); 109 } 110 } 111 free(content); 112 content = cJSON_Print(matrix_json); 113 write_file(inputjsonfile, content); 114 cJSON_Delete(matrix_json); 115 free(content); 116 return 0; 117 } 118 119 /* 120 For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU. 121 The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently 122 in each repetition. As a result, each MatMult call is fresh, and we can capture 123 the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect 124 during consecutive calls. 125 */ 126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions) 127 { 128 Mat A2 = NULL; 129 PetscInt i; 130 Vec u; 131 PetscLogDouble vstart = 0, vend = 0; 132 PetscBool isaijcusparse, isaijkokkos; 133 134 PetscFunctionBeginUser; 135 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 136 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 137 if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA)); 138 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 139 PetscCall(VecDuplicate(b, &u)); 140 if (time) *time = 0.0; 141 for (i = 0; i < repetitions; i++) { 142 if (use_gpu) { 143 PetscCall(MatDestroy(&A2)); 144 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 145 PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2)); 146 } else A2 = A; 147 /* Timing MatMult */ 148 if (time) PetscCall(PetscTime(&vstart)); 149 150 PetscCall(MatMult(A2, b, u)); 151 152 if (time) { 153 PetscCall(PetscTime(&vend)); 154 *time += (PetscReal)(vend - vstart); 155 } 156 } 157 PetscCall(VecDestroy(&u)); 158 if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2)); 159 PetscFunctionReturn(PETSC_SUCCESS); 160 } 161 162 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat) 163 { 164 PetscLogEvent event; 165 PetscEventPerfInfo eventInfo; 166 //PetscReal gpuflopRate; 167 168 // if (matformat) { 169 // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event)); 170 // } else { 171 // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event)); 172 // } 173 // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 174 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time)); 175 176 PetscFunctionBeginUser; 177 PetscCall(PetscLogEventGetId("MatMult", &event)); 178 PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 179 //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime; 180 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time)); 181 if (cputime) *cputime = eventInfo.time; 182 #if defined(PETSC_HAVE_DEVICE) 183 if (gputime) *gputime = eventInfo.GpuTime; 184 if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6; 185 #endif 186 PetscFunctionReturn(PETSC_SUCCESS); 187 } 188 189 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat) 190 { 191 PetscBool iscsr, issell, iscsrkokkos; 192 193 PetscFunctionBeginUser; 194 PetscCall(PetscStrcmp(matformat, "csr", &iscsr)); 195 if (iscsr) { 196 if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat)); 197 else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat)); 198 } else { 199 PetscCall(PetscStrcmp(matformat, "sell", &issell)); 200 if (issell) { 201 if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA 202 else PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); 203 } else { 204 PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos)); 205 if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat)); 206 } 207 } 208 PetscFunctionReturn(PETSC_SUCCESS); 209 } 210 211 int main(int argc, char **args) 212 { 213 PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1; 214 Mat A; 215 Vec b; 216 char jfilename[PETSC_MAX_PATH_LEN]; 217 char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN]; 218 char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN]; 219 char *matformats[5]; 220 char **filenames = NULL, **groupnames = NULL, **matnames = NULL; 221 char ordering[256] = MATORDERINGRCM; 222 PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE; 223 IS rowperm = NULL, colperm = NULL; 224 PetscViewer fd; 225 PetscReal starting_spmv_time = 0, *spmv_times; 226 227 PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null")); 228 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 229 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1)); 230 if (!flg1) { 231 nformats = 1; 232 PetscCall(PetscStrallocpy("csr", &matformats[0])); 233 } 234 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL)); 235 PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL)); 236 /* Read matrix and RHS */ 237 PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL)); 238 PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL)); 239 PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1)); 240 PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2)); 241 PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3)); 242 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", ""); 243 PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute)); 244 PetscOptionsEnd(); 245 #if !defined(PETSC_HAVE_DEVICE) 246 PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support"); 247 #endif 248 PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format"); 249 if (flg3) { 250 ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat); 251 PetscCall(PetscCalloc1(nmat, &spmv_times)); 252 } else if (flg2) { 253 PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE)); 254 } else if (flg1) { 255 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd)); 256 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 257 PetscCall(MatSetType(A, MATAIJ)); 258 PetscCall(MatSetFromOptions(A)); 259 PetscCall(MatLoad(A, fd)); 260 PetscCall(PetscViewerDestroy(&fd)); 261 } 262 if (permute) { 263 Mat Aperm; 264 PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm)); 265 PetscCall(MatPermute(A, rowperm, colperm, &Aperm)); 266 PetscCall(MatDestroy(&A)); 267 A = Aperm; /* Replace original operator with permuted version */ 268 } 269 /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */ 270 PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg)); 271 if (bflg) { 272 PetscViewer fb; 273 PetscCall(VecCreate(PETSC_COMM_WORLD, &b)); 274 PetscCall(VecSetFromOptions(b)); 275 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb)); 276 PetscCall(VecLoad(b, fb)); 277 PetscCall(PetscViewerDestroy(&fb)); 278 } 279 280 for (j = 0; j < nformats; j++) { 281 char *petscmatformat = NULL; 282 PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat)); 283 PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]); 284 if (flg3) { // mat names specified in a JSON file 285 for (i = 0; i < nmat; i++) { 286 PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE)); 287 if (!bflg) { 288 PetscCall(MatCreateVecs(A, &b, NULL)); 289 PetscCall(VecSet(b, 1.0)); 290 } 291 PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions)); 292 if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat)); 293 else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat)); 294 PetscCall(MatDestroy(&A)); 295 if (!bflg) PetscCall(VecDestroy(&b)); 296 } 297 UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions); 298 starting_spmv_time = spmv_times[nmat - 1]; 299 } else { 300 PetscReal spmv_time; 301 if (!bflg) { 302 PetscCall(MatCreateVecs(A, &b, NULL)); 303 PetscCall(VecSet(b, 1.0)); 304 } 305 PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions)); 306 if (!bflg) PetscCall(VecDestroy(&b)); 307 } 308 PetscCall(PetscFree(petscmatformat)); 309 } 310 if (flg3) { 311 for (i = 0; i < nmat; i++) { 312 free(filenames[i]); 313 free(groupnames[i]); 314 free(matnames[i]); 315 } 316 free(filenames); 317 free(groupnames); 318 free(matnames); 319 PetscCall(PetscFree(spmv_times)); 320 } 321 for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j])); 322 if (flg1 || flg2) PetscCall(MatDestroy(&A)); 323 if (bflg) PetscCall(VecDestroy(&b)); 324 PetscCall(ISDestroy(&rowperm)); 325 PetscCall(ISDestroy(&colperm)); 326 PetscCall(PetscFinalize()); 327 return 0; 328 } 329 /*TEST 330 331 build: 332 requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES) 333 depends: mmloader.c mmio.c cJSON.c 334 335 test: 336 suffix: 1 337 args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx 338 339 test: 340 suffix: 2 341 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 342 output_file: output/bench_spmv_1.out 343 requires: cuda 344 345 TEST*/ 346