1 static char help[] = "Driver for benchmarking SpMV.";
2
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6
read_file(const char * filename)7 char *read_file(const char *filename)
8 {
9 FILE *file = NULL;
10 long length = 0;
11 char *content = NULL;
12 size_t read_chars = 0;
13
14 /* open in read binary mode */
15 file = fopen(filename, "rb");
16 if (file) {
17 /* get the length */
18 fseek(file, 0, SEEK_END);
19 length = ftell(file);
20 fseek(file, 0, SEEK_SET);
21 /* allocate content buffer */
22 content = (char *)malloc((size_t)length + sizeof(""));
23 /* read the file into memory */
24 read_chars = fread(content, sizeof(char), (size_t)length, file);
25 content[read_chars] = '\0';
26 fclose(file);
27 }
28 return content;
29 }
30
write_file(const char * filename,const char * content)31 void write_file(const char *filename, const char *content)
32 {
33 FILE *file = NULL;
34 file = fopen(filename, "w");
35 if (file) fputs(content, file);
36 fclose(file);
37 }
38
ParseJSON(const char * const inputjsonfile,char *** outputfilenames,char *** outputgroupnames,char *** outputmatnames,int * nmat)39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41 char *content = read_file(inputjsonfile);
42 cJSON *matrix_json = NULL;
43 const cJSON *problem = NULL, *elem = NULL;
44 const cJSON *item = NULL;
45 char **filenames, **groupnames, **matnames;
46 int i, n;
47 if (!content) return 0;
48 matrix_json = cJSON_Parse(content);
49 if (!matrix_json) return 0;
50 n = cJSON_GetArraySize(matrix_json);
51 *nmat = n;
52 filenames = (char **)malloc(sizeof(char *) * n);
53 groupnames = (char **)malloc(sizeof(char *) * n);
54 matnames = (char **)malloc(sizeof(char *) * n);
55 for (i = 0; i < n; i++) {
56 elem = cJSON_GetArrayItem(matrix_json, i);
57 item = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58 filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59 strcpy(filenames[i], item->valuestring);
60 problem = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61 item = cJSON_GetObjectItemCaseSensitive(problem, "group");
62 groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63 strcpy(groupnames[i], item->valuestring);
64 item = cJSON_GetObjectItemCaseSensitive(problem, "name");
65 matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66 strcpy(matnames[i], item->valuestring);
67 }
68 cJSON_Delete(matrix_json);
69 free(content);
70 *outputfilenames = filenames;
71 *outputgroupnames = groupnames;
72 *outputmatnames = matnames;
73 return 0;
74 }
75
UpdateJSON(const char * const inputjsonfile,PetscReal * spmv_times,PetscReal starting_spmv_time,const char * const matformat,PetscBool use_gpu,PetscInt repetitions)76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78 char *content = read_file(inputjsonfile);
79 cJSON *matrix_json = NULL;
80 cJSON *elem = NULL;
81 int i, n;
82 if (!content) return 0;
83 matrix_json = cJSON_Parse(content);
84 if (!matrix_json) return 0;
85 n = cJSON_GetArraySize(matrix_json);
86 for (i = 0; i < n; i++) {
87 cJSON *spmv = NULL;
88 cJSON *format = NULL;
89 elem = cJSON_GetArrayItem(matrix_json, i);
90 spmv = cJSON_GetObjectItem(elem, "spmv");
91 if (spmv) {
92 format = cJSON_GetObjectItem(spmv, matformat);
93 if (format) {
94 cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95 cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96 } else {
97 format = cJSON_CreateObject();
98 cJSON_AddItemToObject(spmv, matformat, format);
99 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100 cJSON_AddNumberToObject(format, "repetitions", repetitions);
101 }
102 } else {
103 spmv = cJSON_CreateObject();
104 cJSON_AddItemToObject(elem, "spmv", spmv);
105 format = cJSON_CreateObject();
106 cJSON_AddItemToObject(spmv, matformat, format);
107 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108 cJSON_AddNumberToObject(format, "repetitions", repetitions);
109 }
110 }
111 free(content);
112 content = cJSON_Print(matrix_json);
113 write_file(inputjsonfile, content);
114 cJSON_Delete(matrix_json);
115 free(content);
116 return 0;
117 }
118
119 /*
120 For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121 The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122 in each repetition. As a result, each MatMult call is fresh, and we can capture
123 the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124 during consecutive calls.
125 */
TimedSpMV(Mat A,Vec b,PetscReal * time,const char * petscmatformat,PetscBool use_gpu,PetscInt repetitions)126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128 Mat A2 = NULL;
129 PetscInt i;
130 Vec u;
131 PetscLogDouble vstart = 0, vend = 0;
132 PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
133
134 PetscFunctionBeginUser;
135 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136 PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
137 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
138 PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
139 PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
140 if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
141 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
142 if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
143 PetscCall(VecDuplicate(b, &u));
144 if (time) *time = 0.0;
145 for (i = 0; i < repetitions; i++) {
146 if (use_gpu) {
147 PetscCall(MatDestroy(&A2));
148 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
149 PetscCall(MatSetType(A2, petscmatformat));
150 PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL
151 } else A2 = A;
152 /* Timing MatMult */
153 if (time) PetscCall(PetscTime(&vstart));
154
155 PetscCall(MatMult(A2, b, u));
156
157 if (time) {
158 PetscCall(PetscTime(&vend));
159 *time += (PetscReal)(vend - vstart);
160 }
161 }
162 PetscCall(VecDestroy(&u));
163 if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
164 PetscFunctionReturn(PETSC_SUCCESS);
165 }
166
WarmUpDevice(Mat A,Vec b,const char * petscmatformat)167 PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat)
168 {
169 Mat A2 = NULL;
170 PetscLogEvent event;
171 Vec u;
172 PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
173
174 PetscFunctionBeginUser;
175 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
176 PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
177 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
178 PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
179 PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
180 if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS);
181 if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
182 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
183 if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
184 PetscCall(VecDuplicate(b, &u));
185 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
186 PetscCall(MatSetType(A2, petscmatformat));
187 PetscCall(PetscLogEventGetId("MatMult", &event));
188 PetscCall(PetscLogEventDeactivatePush(event));
189 PetscCall(MatMult(A2, b, u));
190 PetscCall(PetscLogEventDeactivatePop(event));
191 PetscCall(VecDestroy(&u));
192 PetscCall(MatDestroy(&A2));
193 PetscFunctionReturn(PETSC_SUCCESS);
194 }
195
PetscLogSpMVTime(PetscReal * gputime,PetscReal * cputime,PetscReal * gpuflops,const char * petscmatformat)196 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
197 {
198 PetscLogEvent event;
199 PetscEventPerfInfo eventInfo;
200 // PetscReal gpuflopRate;
201
202 // if (matformat) {
203 // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
204 // } else {
205 // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
206 // }
207 // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
208 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
209
210 PetscFunctionBeginUser;
211 PetscCall(PetscLogEventGetId("MatMult", &event));
212 PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
213 // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
214 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
215 if (cputime) *cputime = eventInfo.time;
216 #if defined(PETSC_HAVE_DEVICE)
217 if (gputime) *gputime = eventInfo.GpuTime;
218 if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
219 #endif
220 PetscFunctionReturn(PETSC_SUCCESS);
221 }
222
MapToPetscMatType(const char * matformat,PetscBool use_gpu,char ** petscmatformat)223 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
224 {
225 PetscBool iscsr, issell, iscsrkokkos;
226
227 PetscFunctionBeginUser;
228 PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
229 if (iscsr) {
230 if (use_gpu) {
231 #if defined(PETSC_HAVE_CUDA)
232 PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
233 #endif
234 #if defined(PETSC_HAVE_HIP)
235 PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat));
236 #endif
237 } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
238 } else {
239 PetscCall(PetscStrcmp(matformat, "sell", &issell));
240 if (issell) {
241 if (use_gpu) {
242 #if defined(PETSC_HAVE_CUDA)
243 PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
244 #endif
245 #if defined(PETSC_HAVE_HIP)
246 PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat));
247 #endif
248 } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
249 } else {
250 PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
251 if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
252 }
253 }
254 PetscFunctionReturn(PETSC_SUCCESS);
255 }
256
main(int argc,char ** args)257 int main(int argc, char **args)
258 {
259 PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1;
260 Mat A;
261 Vec b;
262 char jfilename[PETSC_MAX_PATH_LEN];
263 char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
264 char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
265 char *matformats[5];
266 char **filenames = NULL, **groupnames = NULL, **matnames = NULL;
267 char ordering[256] = MATORDERINGRCM;
268 PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
269 IS rowperm = NULL, colperm = NULL;
270 PetscViewer fd;
271 PetscReal starting_spmv_time = 0, *spmv_times;
272
273 PetscCall(PetscOptionsInsertString(NULL, "-log_view :/dev/null"));
274 if (PetscDefined(HAVE_DEVICE)) PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time"));
275 PetscCall(PetscInitialize(&argc, &args, NULL, help));
276 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
277 if (!flg1) {
278 nformats = 1;
279 PetscCall(PetscStrallocpy("csr", &matformats[0]));
280 }
281 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
282 PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
283 /* Read matrix and RHS */
284 PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
285 PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
286 PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
287 PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
288 PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
289 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
290 PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
291 PetscOptionsEnd();
292 #if !defined(PETSC_HAVE_DEVICE)
293 PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
294 #endif
295 PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
296 if (flg3) {
297 ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
298 PetscCall(PetscCalloc1(nmat, &spmv_times));
299 } else if (flg2) {
300 PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
301 } else if (flg1) {
302 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
303 PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
304 PetscCall(MatSetType(A, MATAIJ));
305 PetscCall(MatSetFromOptions(A));
306 PetscCall(MatLoad(A, fd));
307 PetscCall(PetscViewerDestroy(&fd));
308 }
309 if (permute) {
310 Mat Aperm;
311 PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
312 PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
313 PetscCall(MatDestroy(&A));
314 A = Aperm; /* Replace original operator with permuted version */
315 }
316 /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
317 PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
318 if (bflg) {
319 PetscViewer fb;
320 PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
321 PetscCall(VecSetFromOptions(b));
322 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
323 PetscCall(VecLoad(b, fb));
324 PetscCall(PetscViewerDestroy(&fb));
325 }
326
327 for (j = 0; j < nformats; j++) {
328 char *petscmatformat = NULL;
329 PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
330 PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
331 if (flg3) { // mat names specified in a JSON file
332 for (i = 0; i < nmat; i++) {
333 PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
334 if (!bflg) {
335 PetscCall(MatCreateVecs(A, &b, NULL));
336 PetscCall(VecSet(b, 1.0));
337 }
338 if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
339 PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
340 if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
341 else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
342 PetscCall(MatDestroy(&A));
343 if (!bflg) PetscCall(VecDestroy(&b));
344 }
345 UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
346 starting_spmv_time = spmv_times[nmat - 1];
347 } else {
348 PetscReal spmv_time;
349 if (!bflg) {
350 PetscCall(MatCreateVecs(A, &b, NULL));
351 PetscCall(VecSet(b, 1.0));
352 }
353 if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
354 PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
355 if (!bflg) PetscCall(VecDestroy(&b));
356 }
357 PetscCall(PetscFree(petscmatformat));
358 }
359 if (flg3) {
360 for (i = 0; i < nmat; i++) {
361 free(filenames[i]);
362 free(groupnames[i]);
363 free(matnames[i]);
364 }
365 free(filenames);
366 free(groupnames);
367 free(matnames);
368 PetscCall(PetscFree(spmv_times));
369 }
370 for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
371 if (flg1 || flg2) PetscCall(MatDestroy(&A));
372 if (bflg) PetscCall(VecDestroy(&b));
373 PetscCall(ISDestroy(&rowperm));
374 PetscCall(ISDestroy(&colperm));
375 PetscCall(PetscFinalize());
376 return 0;
377 }
378 /*TEST
379
380 build:
381 requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
382 depends: mmloader.c mmio.c cJSON.c
383
384 test:
385 suffix: 1
386 args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
387 output_file: output/empty.out
388
389 test:
390 suffix: 2
391 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
392 output_file: output/empty.out
393 requires: cuda
394
395 test:
396 suffix: 3
397 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
398 output_file: output/empty.out
399 requires: hip
400
401 TEST*/
402