xref: /petsc/src/mat/tests/bench_spmv.c (revision 773bf0f69b9b2a05ff80bbab7f5cfee096f500d4)
1 static char help[] = "Driver for benchmarking SpMV.";
2 
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6 
7 char *read_file(const char *filename)
8 {
9   FILE  *file       = NULL;
10   long   length     = 0;
11   char  *content    = NULL;
12   size_t read_chars = 0;
13 
14   /* open in read binary mode */
15   file = fopen(filename, "rb");
16   if (file) {
17     /* get the length */
18     fseek(file, 0, SEEK_END);
19     length = ftell(file);
20     fseek(file, 0, SEEK_SET);
21     /* allocate content buffer */
22     content = (char *)malloc((size_t)length + sizeof(""));
23     /* read the file into memory */
24     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25     content[read_chars] = '\0';
26     fclose(file);
27   }
28   return content;
29 }
30 
31 void write_file(const char *filename, const char *content)
32 {
33   FILE *file = NULL;
34   file       = fopen(filename, "w");
35   if (file) { fputs(content, file); }
36   fclose(file);
37 }
38 
39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41   char        *content     = read_file(inputjsonfile);
42   cJSON       *matrix_json = NULL;
43   const cJSON *problem = NULL, *elem = NULL;
44   const cJSON *item = NULL;
45   char       **filenames, **groupnames, **matnames;
46   int          i, n;
47   if (!content) return 0;
48   matrix_json = cJSON_Parse(content);
49   if (!matrix_json) return 0;
50   n          = cJSON_GetArraySize(matrix_json);
51   *nmat      = n;
52   filenames  = (char **)malloc(sizeof(char *) * n);
53   groupnames = (char **)malloc(sizeof(char *) * n);
54   matnames   = (char **)malloc(sizeof(char *) * n);
55   for (i = 0; i < n; i++) {
56     elem         = cJSON_GetArrayItem(matrix_json, i);
57     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59     strcpy(filenames[i], item->valuestring);
60     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63     strcpy(groupnames[i], item->valuestring);
64     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66     strcpy(matnames[i], item->valuestring);
67   }
68   cJSON_Delete(matrix_json);
69   free(content);
70   *outputfilenames  = filenames;
71   *outputgroupnames = groupnames;
72   *outputmatnames   = matnames;
73   return 0;
74 }
75 
76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78   char  *content     = read_file(inputjsonfile);
79   cJSON *matrix_json = NULL;
80   cJSON *elem        = NULL;
81   int    i, n;
82   if (!content) return 0;
83   matrix_json = cJSON_Parse(content);
84   if (!matrix_json) return 0;
85   n = cJSON_GetArraySize(matrix_json);
86   for (i = 0; i < n; i++) {
87     cJSON *spmv   = NULL;
88     cJSON *format = NULL;
89     elem          = cJSON_GetArrayItem(matrix_json, i);
90     spmv          = cJSON_GetObjectItem(elem, "spmv");
91     if (spmv) {
92       format = cJSON_GetObjectItem(spmv, matformat);
93       if (format) {
94         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96       } else {
97         format = cJSON_CreateObject();
98         cJSON_AddItemToObject(spmv, matformat, format);
99         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101       }
102     } else {
103       spmv = cJSON_CreateObject();
104       cJSON_AddItemToObject(elem, "spmv", spmv);
105       format = cJSON_CreateObject();
106       cJSON_AddItemToObject(spmv, matformat, format);
107       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109     }
110   }
111   free(content);
112   content = cJSON_Print(matrix_json);
113   write_file(inputjsonfile, content);
114   cJSON_Delete(matrix_json);
115   free(content);
116   return 0;
117 }
118 
119 /*
120   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124   during consecutive calls.
125 */
126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128   Mat            A2 = NULL;
129   PetscInt       i;
130   Vec            u;
131   PetscLogDouble vstart = 0, vend = 0;
132   PetscBool      isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
133 
134   PetscFunctionBeginUser;
135   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
137   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
138   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
139   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
140   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
141   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
142   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
143   PetscCall(VecDuplicate(b, &u));
144   if (time) *time = 0.0;
145   for (i = 0; i < repetitions; i++) {
146     if (use_gpu) {
147       PetscCall(MatDestroy(&A2));
148       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
149       if (issellcuda) {
150         PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2));
151         PetscCall(MatConvert(A2, MATSELLCUDA, MAT_INPLACE_MATRIX, &A2));
152       } else if (issellhip) {
153         PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2));
154         PetscCall(MatConvert(A2, MATSELLHIP, MAT_INPLACE_MATRIX, &A2));
155       } else {
156         PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
157       }
158     } else A2 = A;
159     /* Timing MatMult */
160     if (time) PetscCall(PetscTime(&vstart));
161 
162     PetscCall(MatMult(A2, b, u));
163 
164     if (time) {
165       PetscCall(PetscTime(&vend));
166       *time += (PetscReal)(vend - vstart);
167     }
168   }
169   PetscCall(VecDestroy(&u));
170   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
171   PetscFunctionReturn(PETSC_SUCCESS);
172 }
173 
174 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
175 {
176   PetscLogEvent      event;
177   PetscEventPerfInfo eventInfo;
178   //PetscReal          gpuflopRate;
179 
180   // if (matformat) {
181   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
182   // } else {
183   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
184   // }
185   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
186   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
187 
188   PetscFunctionBeginUser;
189   PetscCall(PetscLogEventGetId("MatMult", &event));
190   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
191   //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
192   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
193   if (cputime) *cputime = eventInfo.time;
194 #if defined(PETSC_HAVE_DEVICE)
195   if (gputime) *gputime = eventInfo.GpuTime;
196   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
197 #endif
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
202 {
203   PetscBool iscsr, issell, iscsrkokkos;
204 
205   PetscFunctionBeginUser;
206   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
207   if (iscsr) {
208     if (use_gpu) {
209 #if defined(PETSC_HAVE_CUDA)
210       PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
211 #endif
212 #if defined(PETSC_HAVE_HIP)
213       PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat));
214 #endif
215     } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
216   } else {
217     PetscCall(PetscStrcmp(matformat, "sell", &issell));
218     if (issell) {
219       if (use_gpu) {
220 #if defined(PETSC_HAVE_CUDA)
221         PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
222 #endif
223 #if defined(PETSC_HAVE_HIP)
224         PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat));
225 #endif
226       } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
227     } else {
228       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
229       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
230     }
231   }
232   PetscFunctionReturn(PETSC_SUCCESS);
233 }
234 
235 int main(int argc, char **args)
236 {
237   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
238   Mat         A;
239   Vec         b;
240   char        jfilename[PETSC_MAX_PATH_LEN];
241   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
242   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
243   char       *matformats[5];
244   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
245   char        ordering[256] = MATORDERINGRCM;
246   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
247   IS          rowperm = NULL, colperm = NULL;
248   PetscViewer fd;
249   PetscReal   starting_spmv_time = 0, *spmv_times;
250 
251   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
252   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
253   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
254   if (!flg1) {
255     nformats = 1;
256     PetscCall(PetscStrallocpy("csr", &matformats[0]));
257   }
258   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
259   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
260   /* Read matrix and RHS */
261   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
262   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
263   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
264   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
265   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
266   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
267   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
268   PetscOptionsEnd();
269 #if !defined(PETSC_HAVE_DEVICE)
270   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
271 #endif
272   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
273   if (flg3) {
274     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
275     PetscCall(PetscCalloc1(nmat, &spmv_times));
276   } else if (flg2) {
277     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
278   } else if (flg1) {
279     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
280     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
281     PetscCall(MatSetType(A, MATAIJ));
282     PetscCall(MatSetFromOptions(A));
283     PetscCall(MatLoad(A, fd));
284     PetscCall(PetscViewerDestroy(&fd));
285   }
286   if (permute) {
287     Mat Aperm;
288     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
289     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
290     PetscCall(MatDestroy(&A));
291     A = Aperm; /* Replace original operator with permuted version */
292   }
293   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
294   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
295   if (bflg) {
296     PetscViewer fb;
297     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
298     PetscCall(VecSetFromOptions(b));
299     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
300     PetscCall(VecLoad(b, fb));
301     PetscCall(PetscViewerDestroy(&fb));
302   }
303 
304   for (j = 0; j < nformats; j++) {
305     char *petscmatformat = NULL;
306     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
307     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
308     if (flg3) { // mat names specified in a JSON file
309       for (i = 0; i < nmat; i++) {
310         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
311         if (!bflg) {
312           PetscCall(MatCreateVecs(A, &b, NULL));
313           PetscCall(VecSet(b, 1.0));
314         }
315         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
316         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
317         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
318         PetscCall(MatDestroy(&A));
319         if (!bflg) PetscCall(VecDestroy(&b));
320       }
321       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
322       starting_spmv_time = spmv_times[nmat - 1];
323     } else {
324       PetscReal spmv_time;
325       if (!bflg) {
326         PetscCall(MatCreateVecs(A, &b, NULL));
327         PetscCall(VecSet(b, 1.0));
328       }
329       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
330       if (!bflg) PetscCall(VecDestroy(&b));
331     }
332     PetscCall(PetscFree(petscmatformat));
333   }
334   if (flg3) {
335     for (i = 0; i < nmat; i++) {
336       free(filenames[i]);
337       free(groupnames[i]);
338       free(matnames[i]);
339     }
340     free(filenames);
341     free(groupnames);
342     free(matnames);
343     PetscCall(PetscFree(spmv_times));
344   }
345   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
346   if (flg1 || flg2) PetscCall(MatDestroy(&A));
347   if (bflg) PetscCall(VecDestroy(&b));
348   PetscCall(ISDestroy(&rowperm));
349   PetscCall(ISDestroy(&colperm));
350   PetscCall(PetscFinalize());
351   return 0;
352 }
353 /*TEST
354 
355    build:
356       requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
357       depends: mmloader.c mmio.c cJSON.c
358 
359    test:
360       suffix: 1
361       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
362 
363    test:
364       suffix: 2
365       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
366       output_file: output/bench_spmv_1.out
367       requires: cuda
368 
369    test:
370       suffix: 3
371       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
372       output_file: output/bench_spmv_1.out
373       requires: hip
374 
375 TEST*/
376