xref: /petsc/src/mat/tests/bench_spmv.c (revision e0b7e82fd3cf27fce84cc3e37e8d70a5c36a2d4e)
1 static char help[] = "Driver for benchmarking SpMV.";
2 
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6 
7 char *read_file(const char *filename)
8 {
9   FILE  *file       = NULL;
10   long   length     = 0;
11   char  *content    = NULL;
12   size_t read_chars = 0;
13 
14   /* open in read binary mode */
15   file = fopen(filename, "rb");
16   if (file) {
17     /* get the length */
18     fseek(file, 0, SEEK_END);
19     length = ftell(file);
20     fseek(file, 0, SEEK_SET);
21     /* allocate content buffer */
22     content = (char *)malloc((size_t)length + sizeof(""));
23     /* read the file into memory */
24     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25     content[read_chars] = '\0';
26     fclose(file);
27   }
28   return content;
29 }
30 
31 void write_file(const char *filename, const char *content)
32 {
33   FILE *file = NULL;
34   file       = fopen(filename, "w");
35   if (file) { fputs(content, file); }
36   fclose(file);
37 }
38 
39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41   char        *content     = read_file(inputjsonfile);
42   cJSON       *matrix_json = NULL;
43   const cJSON *problem = NULL, *elem = NULL;
44   const cJSON *item = NULL;
45   char       **filenames, **groupnames, **matnames;
46   int          i, n;
47   if (!content) return 0;
48   matrix_json = cJSON_Parse(content);
49   if (!matrix_json) return 0;
50   n          = cJSON_GetArraySize(matrix_json);
51   *nmat      = n;
52   filenames  = (char **)malloc(sizeof(char *) * n);
53   groupnames = (char **)malloc(sizeof(char *) * n);
54   matnames   = (char **)malloc(sizeof(char *) * n);
55   for (i = 0; i < n; i++) {
56     elem         = cJSON_GetArrayItem(matrix_json, i);
57     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59     strcpy(filenames[i], item->valuestring);
60     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63     strcpy(groupnames[i], item->valuestring);
64     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66     strcpy(matnames[i], item->valuestring);
67   }
68   cJSON_Delete(matrix_json);
69   free(content);
70   *outputfilenames  = filenames;
71   *outputgroupnames = groupnames;
72   *outputmatnames   = matnames;
73   return 0;
74 }
75 
76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78   char  *content     = read_file(inputjsonfile);
79   cJSON *matrix_json = NULL;
80   cJSON *elem        = NULL;
81   int    i, n;
82   if (!content) return 0;
83   matrix_json = cJSON_Parse(content);
84   if (!matrix_json) return 0;
85   n = cJSON_GetArraySize(matrix_json);
86   for (i = 0; i < n; i++) {
87     cJSON *spmv   = NULL;
88     cJSON *format = NULL;
89     elem          = cJSON_GetArrayItem(matrix_json, i);
90     spmv          = cJSON_GetObjectItem(elem, "spmv");
91     if (spmv) {
92       format = cJSON_GetObjectItem(spmv, matformat);
93       if (format) {
94         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96       } else {
97         format = cJSON_CreateObject();
98         cJSON_AddItemToObject(spmv, matformat, format);
99         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101       }
102     } else {
103       spmv = cJSON_CreateObject();
104       cJSON_AddItemToObject(elem, "spmv", spmv);
105       format = cJSON_CreateObject();
106       cJSON_AddItemToObject(spmv, matformat, format);
107       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109     }
110   }
111   free(content);
112   content = cJSON_Print(matrix_json);
113   write_file(inputjsonfile, content);
114   cJSON_Delete(matrix_json);
115   free(content);
116   return 0;
117 }
118 
119 /*
120   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124   during consecutive calls.
125 */
126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128   Mat            A2 = NULL;
129   PetscInt       i;
130   Vec            u;
131   PetscLogDouble vstart = 0, vend = 0;
132   PetscBool      isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
133 
134   PetscFunctionBeginUser;
135   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
137   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
138   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
139   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
140   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
141   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
142   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
143   PetscCall(VecDuplicate(b, &u));
144   if (time) *time = 0.0;
145   for (i = 0; i < repetitions; i++) {
146     if (use_gpu) {
147       PetscCall(MatDestroy(&A2));
148       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
149       PetscCall(MatSetType(A2, petscmatformat));
150       PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL
151     } else A2 = A;
152     /* Timing MatMult */
153     if (time) PetscCall(PetscTime(&vstart));
154 
155     PetscCall(MatMult(A2, b, u));
156 
157     if (time) {
158       PetscCall(PetscTime(&vend));
159       *time += (PetscReal)(vend - vstart);
160     }
161   }
162   PetscCall(VecDestroy(&u));
163   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
164   PetscFunctionReturn(PETSC_SUCCESS);
165 }
166 
167 PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat)
168 {
169   Mat           A2 = NULL;
170   PetscLogEvent event;
171   Vec           u;
172   PetscBool     isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
173 
174   PetscFunctionBeginUser;
175   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
176   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
177   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
178   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
179   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
180   if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS);
181   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
182   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
183   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
184   PetscCall(VecDuplicate(b, &u));
185   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
186   PetscCall(MatSetType(A2, petscmatformat));
187   PetscCall(PetscLogEventGetId("MatMult", &event));
188   PetscCall(PetscLogEventDeactivatePush(event));
189   PetscCall(MatMult(A2, b, u));
190   PetscCall(PetscLogEventDeactivatePop(event));
191   PetscCall(VecDestroy(&u));
192   PetscCall(MatDestroy(&A2));
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
196 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
197 {
198   PetscLogEvent      event;
199   PetscEventPerfInfo eventInfo;
200   // PetscReal          gpuflopRate;
201 
202   // if (matformat) {
203   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
204   // } else {
205   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
206   // }
207   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
208   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
209 
210   PetscFunctionBeginUser;
211   PetscCall(PetscLogEventGetId("MatMult", &event));
212   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
213   // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
214   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
215   if (cputime) *cputime = eventInfo.time;
216 #if defined(PETSC_HAVE_DEVICE)
217   if (gputime) *gputime = eventInfo.GpuTime;
218   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
219 #endif
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
223 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
224 {
225   PetscBool iscsr, issell, iscsrkokkos;
226 
227   PetscFunctionBeginUser;
228   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
229   if (iscsr) {
230     if (use_gpu) {
231 #if defined(PETSC_HAVE_CUDA)
232       PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
233 #endif
234 #if defined(PETSC_HAVE_HIP)
235       PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat));
236 #endif
237     } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
238   } else {
239     PetscCall(PetscStrcmp(matformat, "sell", &issell));
240     if (issell) {
241       if (use_gpu) {
242 #if defined(PETSC_HAVE_CUDA)
243         PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
244 #endif
245 #if defined(PETSC_HAVE_HIP)
246         PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat));
247 #endif
248       } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
249     } else {
250       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
251       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
252     }
253   }
254   PetscFunctionReturn(PETSC_SUCCESS);
255 }
256 
257 int main(int argc, char **args)
258 {
259   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
260   Mat         A;
261   Vec         b;
262   char        jfilename[PETSC_MAX_PATH_LEN];
263   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
264   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
265   char       *matformats[5];
266   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
267   char        ordering[256] = MATORDERINGRCM;
268   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
269   IS          rowperm = NULL, colperm = NULL;
270   PetscViewer fd;
271   PetscReal   starting_spmv_time = 0, *spmv_times;
272 
273   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
274   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
275   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
276   if (!flg1) {
277     nformats = 1;
278     PetscCall(PetscStrallocpy("csr", &matformats[0]));
279   }
280   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
281   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
282   /* Read matrix and RHS */
283   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
284   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
285   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
286   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
287   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
288   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
289   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
290   PetscOptionsEnd();
291 #if !defined(PETSC_HAVE_DEVICE)
292   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
293 #endif
294   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
295   if (flg3) {
296     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
297     PetscCall(PetscCalloc1(nmat, &spmv_times));
298   } else if (flg2) {
299     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
300   } else if (flg1) {
301     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
302     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
303     PetscCall(MatSetType(A, MATAIJ));
304     PetscCall(MatSetFromOptions(A));
305     PetscCall(MatLoad(A, fd));
306     PetscCall(PetscViewerDestroy(&fd));
307   }
308   if (permute) {
309     Mat Aperm;
310     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
311     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
312     PetscCall(MatDestroy(&A));
313     A = Aperm; /* Replace original operator with permuted version */
314   }
315   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
316   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
317   if (bflg) {
318     PetscViewer fb;
319     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
320     PetscCall(VecSetFromOptions(b));
321     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
322     PetscCall(VecLoad(b, fb));
323     PetscCall(PetscViewerDestroy(&fb));
324   }
325 
326   for (j = 0; j < nformats; j++) {
327     char *petscmatformat = NULL;
328     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
329     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
330     if (flg3) { // mat names specified in a JSON file
331       for (i = 0; i < nmat; i++) {
332         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
333         if (!bflg) {
334           PetscCall(MatCreateVecs(A, &b, NULL));
335           PetscCall(VecSet(b, 1.0));
336         }
337         if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
338         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
339         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
340         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
341         PetscCall(MatDestroy(&A));
342         if (!bflg) PetscCall(VecDestroy(&b));
343       }
344       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
345       starting_spmv_time = spmv_times[nmat - 1];
346     } else {
347       PetscReal spmv_time;
348       if (!bflg) {
349         PetscCall(MatCreateVecs(A, &b, NULL));
350         PetscCall(VecSet(b, 1.0));
351       }
352       if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
353       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
354       if (!bflg) PetscCall(VecDestroy(&b));
355     }
356     PetscCall(PetscFree(petscmatformat));
357   }
358   if (flg3) {
359     for (i = 0; i < nmat; i++) {
360       free(filenames[i]);
361       free(groupnames[i]);
362       free(matnames[i]);
363     }
364     free(filenames);
365     free(groupnames);
366     free(matnames);
367     PetscCall(PetscFree(spmv_times));
368   }
369   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
370   if (flg1 || flg2) PetscCall(MatDestroy(&A));
371   if (bflg) PetscCall(VecDestroy(&b));
372   PetscCall(ISDestroy(&rowperm));
373   PetscCall(ISDestroy(&colperm));
374   PetscCall(PetscFinalize());
375   return 0;
376 }
377 /*TEST
378 
379    build:
380       requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
381       depends: mmloader.c mmio.c cJSON.c
382 
383    test:
384       suffix: 1
385       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
386 
387    test:
388       suffix: 2
389       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
390       output_file: output/bench_spmv_1.out
391       requires: cuda
392 
393    test:
394       suffix: 3
395       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
396       output_file: output/bench_spmv_1.out
397       requires: hip
398 
399 TEST*/
400