xref: /petsc/src/mat/tests/bench_spmv.c (revision 017deb10d530c1b6d9744fcd772cd96c5fcd74f2)
1 static char help[] = "Driver for benchmarking SpMV.";
2 
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6 
read_file(const char * filename)7 char *read_file(const char *filename)
8 {
9   FILE  *file       = NULL;
10   long   length     = 0;
11   char  *content    = NULL;
12   size_t read_chars = 0;
13 
14   /* open in read binary mode */
15   file = fopen(filename, "rb");
16   if (file) {
17     /* get the length */
18     fseek(file, 0, SEEK_END);
19     length = ftell(file);
20     fseek(file, 0, SEEK_SET);
21     /* allocate content buffer */
22     content = (char *)malloc((size_t)length + sizeof(""));
23     /* read the file into memory */
24     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25     content[read_chars] = '\0';
26     fclose(file);
27   }
28   return content;
29 }
30 
write_file(const char * filename,const char * content)31 void write_file(const char *filename, const char *content)
32 {
33   FILE *file = NULL;
34   file       = fopen(filename, "w");
35   if (file) fputs(content, file);
36   fclose(file);
37 }
38 
ParseJSON(const char * const inputjsonfile,char *** outputfilenames,char *** outputgroupnames,char *** outputmatnames,int * nmat)39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41   char        *content     = read_file(inputjsonfile);
42   cJSON       *matrix_json = NULL;
43   const cJSON *problem = NULL, *elem = NULL;
44   const cJSON *item = NULL;
45   char       **filenames, **groupnames, **matnames;
46   int          i, n;
47   if (!content) return 0;
48   matrix_json = cJSON_Parse(content);
49   if (!matrix_json) return 0;
50   n          = cJSON_GetArraySize(matrix_json);
51   *nmat      = n;
52   filenames  = (char **)malloc(sizeof(char *) * n);
53   groupnames = (char **)malloc(sizeof(char *) * n);
54   matnames   = (char **)malloc(sizeof(char *) * n);
55   for (i = 0; i < n; i++) {
56     elem         = cJSON_GetArrayItem(matrix_json, i);
57     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59     strcpy(filenames[i], item->valuestring);
60     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63     strcpy(groupnames[i], item->valuestring);
64     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66     strcpy(matnames[i], item->valuestring);
67   }
68   cJSON_Delete(matrix_json);
69   free(content);
70   *outputfilenames  = filenames;
71   *outputgroupnames = groupnames;
72   *outputmatnames   = matnames;
73   return 0;
74 }
75 
UpdateJSON(const char * const inputjsonfile,PetscReal * spmv_times,PetscReal starting_spmv_time,const char * const matformat,PetscBool use_gpu,PetscInt repetitions)76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78   char  *content     = read_file(inputjsonfile);
79   cJSON *matrix_json = NULL;
80   cJSON *elem        = NULL;
81   int    i, n;
82   if (!content) return 0;
83   matrix_json = cJSON_Parse(content);
84   if (!matrix_json) return 0;
85   n = cJSON_GetArraySize(matrix_json);
86   for (i = 0; i < n; i++) {
87     cJSON *spmv   = NULL;
88     cJSON *format = NULL;
89     elem          = cJSON_GetArrayItem(matrix_json, i);
90     spmv          = cJSON_GetObjectItem(elem, "spmv");
91     if (spmv) {
92       format = cJSON_GetObjectItem(spmv, matformat);
93       if (format) {
94         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96       } else {
97         format = cJSON_CreateObject();
98         cJSON_AddItemToObject(spmv, matformat, format);
99         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101       }
102     } else {
103       spmv = cJSON_CreateObject();
104       cJSON_AddItemToObject(elem, "spmv", spmv);
105       format = cJSON_CreateObject();
106       cJSON_AddItemToObject(spmv, matformat, format);
107       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109     }
110   }
111   free(content);
112   content = cJSON_Print(matrix_json);
113   write_file(inputjsonfile, content);
114   cJSON_Delete(matrix_json);
115   free(content);
116   return 0;
117 }
118 
119 /*
120   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124   during consecutive calls.
125 */
TimedSpMV(Mat A,Vec b,PetscReal * time,const char * petscmatformat,PetscBool use_gpu,PetscInt repetitions)126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128   Mat            A2 = NULL;
129   PetscInt       i;
130   Vec            u;
131   PetscLogDouble vstart = 0, vend = 0;
132   PetscBool      isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
133 
134   PetscFunctionBeginUser;
135   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
137   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
138   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
139   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
140   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
141   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
142   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
143   PetscCall(VecDuplicate(b, &u));
144   if (time) *time = 0.0;
145   for (i = 0; i < repetitions; i++) {
146     if (use_gpu) {
147       PetscCall(MatDestroy(&A2));
148       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
149       PetscCall(MatSetType(A2, petscmatformat));
150       PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL
151     } else A2 = A;
152     /* Timing MatMult */
153     if (time) PetscCall(PetscTime(&vstart));
154 
155     PetscCall(MatMult(A2, b, u));
156 
157     if (time) {
158       PetscCall(PetscTime(&vend));
159       *time += (PetscReal)(vend - vstart);
160     }
161   }
162   PetscCall(VecDestroy(&u));
163   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
164   PetscFunctionReturn(PETSC_SUCCESS);
165 }
166 
WarmUpDevice(Mat A,Vec b,const char * petscmatformat)167 PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat)
168 {
169   Mat           A2 = NULL;
170   PetscLogEvent event;
171   Vec           u;
172   PetscBool     isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
173 
174   PetscFunctionBeginUser;
175   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
176   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
177   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
178   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
179   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
180   if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS);
181   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
182   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
183   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
184   PetscCall(VecDuplicate(b, &u));
185   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
186   PetscCall(MatSetType(A2, petscmatformat));
187   PetscCall(PetscLogEventGetId("MatMult", &event));
188   PetscCall(PetscLogEventDeactivatePush(event));
189   PetscCall(MatMult(A2, b, u));
190   PetscCall(PetscLogEventDeactivatePop(event));
191   PetscCall(VecDestroy(&u));
192   PetscCall(MatDestroy(&A2));
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
PetscLogSpMVTime(PetscReal * gputime,PetscReal * cputime,PetscReal * gpuflops,const char * petscmatformat)196 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
197 {
198   PetscLogEvent      event;
199   PetscEventPerfInfo eventInfo;
200   // PetscReal          gpuflopRate;
201 
202   // if (matformat) {
203   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
204   // } else {
205   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
206   // }
207   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
208   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
209 
210   PetscFunctionBeginUser;
211   PetscCall(PetscLogEventGetId("MatMult", &event));
212   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
213   // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
214   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
215   if (cputime) *cputime = eventInfo.time;
216 #if defined(PETSC_HAVE_DEVICE)
217   if (gputime) *gputime = eventInfo.GpuTime;
218   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
219 #endif
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
MapToPetscMatType(const char * matformat,PetscBool use_gpu,char ** petscmatformat)223 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
224 {
225   PetscBool iscsr, issell, iscsrkokkos;
226 
227   PetscFunctionBeginUser;
228   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
229   if (iscsr) {
230     if (use_gpu) {
231 #if defined(PETSC_HAVE_CUDA)
232       PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
233 #endif
234 #if defined(PETSC_HAVE_HIP)
235       PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat));
236 #endif
237     } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
238   } else {
239     PetscCall(PetscStrcmp(matformat, "sell", &issell));
240     if (issell) {
241       if (use_gpu) {
242 #if defined(PETSC_HAVE_CUDA)
243         PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
244 #endif
245 #if defined(PETSC_HAVE_HIP)
246         PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat));
247 #endif
248       } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
249     } else {
250       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
251       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
252     }
253   }
254   PetscFunctionReturn(PETSC_SUCCESS);
255 }
256 
main(int argc,char ** args)257 int main(int argc, char **args)
258 {
259   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
260   Mat         A;
261   Vec         b;
262   char        jfilename[PETSC_MAX_PATH_LEN];
263   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
264   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
265   char       *matformats[5];
266   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
267   char        ordering[256] = MATORDERINGRCM;
268   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
269   IS          rowperm = NULL, colperm = NULL;
270   PetscViewer fd;
271   PetscReal   starting_spmv_time = 0, *spmv_times;
272 
273   PetscCall(PetscOptionsInsertString(NULL, "-log_view :/dev/null"));
274   if (PetscDefined(HAVE_DEVICE)) PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time"));
275   PetscCall(PetscInitialize(&argc, &args, NULL, help));
276   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
277   if (!flg1) {
278     nformats = 1;
279     PetscCall(PetscStrallocpy("csr", &matformats[0]));
280   }
281   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
282   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
283   /* Read matrix and RHS */
284   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
285   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
286   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
287   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
288   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
289   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
290   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
291   PetscOptionsEnd();
292 #if !defined(PETSC_HAVE_DEVICE)
293   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
294 #endif
295   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
296   if (flg3) {
297     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
298     PetscCall(PetscCalloc1(nmat, &spmv_times));
299   } else if (flg2) {
300     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
301   } else if (flg1) {
302     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
303     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
304     PetscCall(MatSetType(A, MATAIJ));
305     PetscCall(MatSetFromOptions(A));
306     PetscCall(MatLoad(A, fd));
307     PetscCall(PetscViewerDestroy(&fd));
308   }
309   if (permute) {
310     Mat Aperm;
311     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
312     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
313     PetscCall(MatDestroy(&A));
314     A = Aperm; /* Replace original operator with permuted version */
315   }
316   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
317   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
318   if (bflg) {
319     PetscViewer fb;
320     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
321     PetscCall(VecSetFromOptions(b));
322     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
323     PetscCall(VecLoad(b, fb));
324     PetscCall(PetscViewerDestroy(&fb));
325   }
326 
327   for (j = 0; j < nformats; j++) {
328     char *petscmatformat = NULL;
329     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
330     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
331     if (flg3) { // mat names specified in a JSON file
332       for (i = 0; i < nmat; i++) {
333         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
334         if (!bflg) {
335           PetscCall(MatCreateVecs(A, &b, NULL));
336           PetscCall(VecSet(b, 1.0));
337         }
338         if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
339         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
340         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
341         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
342         PetscCall(MatDestroy(&A));
343         if (!bflg) PetscCall(VecDestroy(&b));
344       }
345       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
346       starting_spmv_time = spmv_times[nmat - 1];
347     } else {
348       PetscReal spmv_time;
349       if (!bflg) {
350         PetscCall(MatCreateVecs(A, &b, NULL));
351         PetscCall(VecSet(b, 1.0));
352       }
353       if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
354       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
355       if (!bflg) PetscCall(VecDestroy(&b));
356     }
357     PetscCall(PetscFree(petscmatformat));
358   }
359   if (flg3) {
360     for (i = 0; i < nmat; i++) {
361       free(filenames[i]);
362       free(groupnames[i]);
363       free(matnames[i]);
364     }
365     free(filenames);
366     free(groupnames);
367     free(matnames);
368     PetscCall(PetscFree(spmv_times));
369   }
370   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
371   if (flg1 || flg2) PetscCall(MatDestroy(&A));
372   if (bflg) PetscCall(VecDestroy(&b));
373   PetscCall(ISDestroy(&rowperm));
374   PetscCall(ISDestroy(&colperm));
375   PetscCall(PetscFinalize());
376   return 0;
377 }
378 /*TEST
379 
380    build:
381       requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
382       depends: mmloader.c mmio.c cJSON.c
383 
384    test:
385       suffix: 1
386       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
387       output_file: output/empty.out
388 
389    test:
390       suffix: 2
391       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
392       output_file: output/empty.out
393       requires: cuda
394 
395    test:
396       suffix: 3
397       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
398       output_file: output/empty.out
399       requires: hip
400 
401 TEST*/
402