1 static char help[] = "Driver for benchmarking SpMV."; 2 3 #include <petscmat.h> 4 #include "cJSON.h" 5 #include "mmloader.h" 6 7 char *read_file(const char *filename) 8 { 9 FILE *file = NULL; 10 long length = 0; 11 char *content = NULL; 12 size_t read_chars = 0; 13 14 /* open in read binary mode */ 15 file = fopen(filename, "rb"); 16 if (file) { 17 /* get the length */ 18 fseek(file, 0, SEEK_END); 19 length = ftell(file); 20 fseek(file, 0, SEEK_SET); 21 /* allocate content buffer */ 22 content = (char *)malloc((size_t)length + sizeof("")); 23 /* read the file into memory */ 24 read_chars = fread(content, sizeof(char), (size_t)length, file); 25 content[read_chars] = '\0'; 26 fclose(file); 27 } 28 return content; 29 } 30 31 void write_file(const char *filename, const char *content) 32 { 33 FILE *file = NULL; 34 file = fopen(filename, "w"); 35 if (file) { fputs(content, file); } 36 fclose(file); 37 } 38 39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat) 40 { 41 char *content = read_file(inputjsonfile); 42 cJSON *matrix_json = NULL; 43 const cJSON *problem = NULL, *elem = NULL; 44 const cJSON *item = NULL; 45 char **filenames, **groupnames, **matnames; 46 int i, n; 47 if (!content) return 0; 48 matrix_json = cJSON_Parse(content); 49 if (!matrix_json) return 0; 50 n = cJSON_GetArraySize(matrix_json); 51 *nmat = n; 52 filenames = (char **)malloc(sizeof(char *) * n); 53 groupnames = (char **)malloc(sizeof(char *) * n); 54 matnames = (char **)malloc(sizeof(char *) * n); 55 for (i = 0; i < n; i++) { 56 elem = cJSON_GetArrayItem(matrix_json, i); 57 item = cJSON_GetObjectItemCaseSensitive(elem, "filename"); 58 filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1)); 59 strcpy(filenames[i], item->valuestring); 60 problem = cJSON_GetObjectItemCaseSensitive(elem, "problem"); 61 item = cJSON_GetObjectItemCaseSensitive(problem, "group"); 62 groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 63 strcpy(groupnames[i], item->valuestring); 64 item = cJSON_GetObjectItemCaseSensitive(problem, "name"); 65 matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 66 strcpy(matnames[i], item->valuestring); 67 } 68 cJSON_Delete(matrix_json); 69 free(content); 70 *outputfilenames = filenames; 71 *outputgroupnames = groupnames; 72 *outputmatnames = matnames; 73 return 0; 74 } 75 76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions) 77 { 78 char *content = read_file(inputjsonfile); 79 cJSON *matrix_json = NULL; 80 cJSON *elem = NULL; 81 int i, n; 82 if (!content) return 0; 83 matrix_json = cJSON_Parse(content); 84 if (!matrix_json) return 0; 85 n = cJSON_GetArraySize(matrix_json); 86 for (i = 0; i < n; i++) { 87 cJSON *spmv = NULL; 88 cJSON *format = NULL; 89 elem = cJSON_GetArrayItem(matrix_json, i); 90 spmv = cJSON_GetObjectItem(elem, "spmv"); 91 if (spmv) { 92 format = cJSON_GetObjectItem(spmv, matformat); 93 if (format) { 94 cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 95 cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions); 96 } else { 97 format = cJSON_CreateObject(); 98 cJSON_AddItemToObject(spmv, matformat, format); 99 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 100 cJSON_AddNumberToObject(format, "repetitions", repetitions); 101 } 102 } else { 103 spmv = cJSON_CreateObject(); 104 cJSON_AddItemToObject(elem, "spmv", spmv); 105 format = cJSON_CreateObject(); 106 cJSON_AddItemToObject(spmv, matformat, format); 107 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 108 cJSON_AddNumberToObject(format, "repetitions", repetitions); 109 } 110 } 111 free(content); 112 content = cJSON_Print(matrix_json); 113 write_file(inputjsonfile, content); 114 cJSON_Delete(matrix_json); 115 free(content); 116 return 0; 117 } 118 119 /* 120 For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU. 121 The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently 122 in each repetition. As a result, each MatMult call is fresh, and we can capture 123 the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect 124 during consecutive calls. 125 */ 126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions) 127 { 128 Mat A2 = NULL; 129 PetscInt i; 130 Vec u; 131 PetscLogDouble vstart = 0, vend = 0; 132 PetscBool isaijcusparse, isaijkokkos, issellcuda; 133 134 PetscFunctionBeginUser; 135 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 136 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 137 PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda)); 138 if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA)); 139 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 140 PetscCall(VecDuplicate(b, &u)); 141 if (time) *time = 0.0; 142 for (i = 0; i < repetitions; i++) { 143 if (use_gpu) { 144 PetscCall(MatDestroy(&A2)); 145 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 146 if (issellcuda) { 147 PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2)); 148 PetscCall(MatConvert(A2, MATSELLCUDA, MAT_INPLACE_MATRIX, &A2)); 149 } else { 150 PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2)); 151 } 152 } else A2 = A; 153 /* Timing MatMult */ 154 if (time) PetscCall(PetscTime(&vstart)); 155 156 PetscCall(MatMult(A2, b, u)); 157 158 if (time) { 159 PetscCall(PetscTime(&vend)); 160 *time += (PetscReal)(vend - vstart); 161 } 162 } 163 PetscCall(VecDestroy(&u)); 164 if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2)); 165 PetscFunctionReturn(PETSC_SUCCESS); 166 } 167 168 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat) 169 { 170 PetscLogEvent event; 171 PetscEventPerfInfo eventInfo; 172 //PetscReal gpuflopRate; 173 174 // if (matformat) { 175 // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event)); 176 // } else { 177 // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event)); 178 // } 179 // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 180 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time)); 181 182 PetscFunctionBeginUser; 183 PetscCall(PetscLogEventGetId("MatMult", &event)); 184 PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 185 //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime; 186 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time)); 187 if (cputime) *cputime = eventInfo.time; 188 #if defined(PETSC_HAVE_DEVICE) 189 if (gputime) *gputime = eventInfo.GpuTime; 190 if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6; 191 #endif 192 PetscFunctionReturn(PETSC_SUCCESS); 193 } 194 195 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat) 196 { 197 PetscBool iscsr, issell, iscsrkokkos; 198 199 PetscFunctionBeginUser; 200 PetscCall(PetscStrcmp(matformat, "csr", &iscsr)); 201 if (iscsr) { 202 if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat)); 203 else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat)); 204 } else { 205 PetscCall(PetscStrcmp(matformat, "sell", &issell)); 206 if (issell) { 207 if (use_gpu) PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat)); 208 else PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); 209 } else { 210 PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos)); 211 if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat)); 212 } 213 } 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 int main(int argc, char **args) 218 { 219 PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1; 220 Mat A; 221 Vec b; 222 char jfilename[PETSC_MAX_PATH_LEN]; 223 char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN]; 224 char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN]; 225 char *matformats[5]; 226 char **filenames = NULL, **groupnames = NULL, **matnames = NULL; 227 char ordering[256] = MATORDERINGRCM; 228 PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE; 229 IS rowperm = NULL, colperm = NULL; 230 PetscViewer fd; 231 PetscReal starting_spmv_time = 0, *spmv_times; 232 233 PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null")); 234 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 235 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1)); 236 if (!flg1) { 237 nformats = 1; 238 PetscCall(PetscStrallocpy("csr", &matformats[0])); 239 } 240 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL)); 241 PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL)); 242 /* Read matrix and RHS */ 243 PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL)); 244 PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL)); 245 PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1)); 246 PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2)); 247 PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3)); 248 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", ""); 249 PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute)); 250 PetscOptionsEnd(); 251 #if !defined(PETSC_HAVE_DEVICE) 252 PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support"); 253 #endif 254 PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format"); 255 if (flg3) { 256 ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat); 257 PetscCall(PetscCalloc1(nmat, &spmv_times)); 258 } else if (flg2) { 259 PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE)); 260 } else if (flg1) { 261 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd)); 262 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 263 PetscCall(MatSetType(A, MATAIJ)); 264 PetscCall(MatSetFromOptions(A)); 265 PetscCall(MatLoad(A, fd)); 266 PetscCall(PetscViewerDestroy(&fd)); 267 } 268 if (permute) { 269 Mat Aperm; 270 PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm)); 271 PetscCall(MatPermute(A, rowperm, colperm, &Aperm)); 272 PetscCall(MatDestroy(&A)); 273 A = Aperm; /* Replace original operator with permuted version */ 274 } 275 /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */ 276 PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg)); 277 if (bflg) { 278 PetscViewer fb; 279 PetscCall(VecCreate(PETSC_COMM_WORLD, &b)); 280 PetscCall(VecSetFromOptions(b)); 281 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb)); 282 PetscCall(VecLoad(b, fb)); 283 PetscCall(PetscViewerDestroy(&fb)); 284 } 285 286 for (j = 0; j < nformats; j++) { 287 char *petscmatformat = NULL; 288 PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat)); 289 PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]); 290 if (flg3) { // mat names specified in a JSON file 291 for (i = 0; i < nmat; i++) { 292 PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE)); 293 if (!bflg) { 294 PetscCall(MatCreateVecs(A, &b, NULL)); 295 PetscCall(VecSet(b, 1.0)); 296 } 297 PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions)); 298 if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat)); 299 else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat)); 300 PetscCall(MatDestroy(&A)); 301 if (!bflg) PetscCall(VecDestroy(&b)); 302 } 303 UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions); 304 starting_spmv_time = spmv_times[nmat - 1]; 305 } else { 306 PetscReal spmv_time; 307 if (!bflg) { 308 PetscCall(MatCreateVecs(A, &b, NULL)); 309 PetscCall(VecSet(b, 1.0)); 310 } 311 PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions)); 312 if (!bflg) PetscCall(VecDestroy(&b)); 313 } 314 PetscCall(PetscFree(petscmatformat)); 315 } 316 if (flg3) { 317 for (i = 0; i < nmat; i++) { 318 free(filenames[i]); 319 free(groupnames[i]); 320 free(matnames[i]); 321 } 322 free(filenames); 323 free(groupnames); 324 free(matnames); 325 PetscCall(PetscFree(spmv_times)); 326 } 327 for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j])); 328 if (flg1 || flg2) PetscCall(MatDestroy(&A)); 329 if (bflg) PetscCall(VecDestroy(&b)); 330 PetscCall(ISDestroy(&rowperm)); 331 PetscCall(ISDestroy(&colperm)); 332 PetscCall(PetscFinalize()); 333 return 0; 334 } 335 /*TEST 336 337 build: 338 requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES) 339 depends: mmloader.c mmio.c cJSON.c 340 341 test: 342 suffix: 1 343 args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx 344 345 test: 346 suffix: 2 347 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 348 output_file: output/bench_spmv_1.out 349 requires: cuda 350 351 TEST*/ 352