xref: /petsc/src/mat/tests/bench_spmv.c (revision 0fdf79fb08699bf9be0aa4d8ba0185e387a216c8)
1 static char help[] = "Driver for benchmarking SpMV.";
2 
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6 
7 char *read_file(const char *filename)
8 {
9   FILE  *file       = NULL;
10   long   length     = 0;
11   char  *content    = NULL;
12   size_t read_chars = 0;
13 
14   /* open in read binary mode */
15   file = fopen(filename, "rb");
16   if (file) {
17     /* get the length */
18     fseek(file, 0, SEEK_END);
19     length = ftell(file);
20     fseek(file, 0, SEEK_SET);
21     /* allocate content buffer */
22     content = (char *)malloc((size_t)length + sizeof(""));
23     /* read the file into memory */
24     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25     content[read_chars] = '\0';
26     fclose(file);
27   }
28   return content;
29 }
30 
31 void write_file(const char *filename, const char *content)
32 {
33   FILE *file = NULL;
34   file       = fopen(filename, "w");
35   if (file) { fputs(content, file); }
36   fclose(file);
37 }
38 
39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41   char        *content     = read_file(inputjsonfile);
42   cJSON       *matrix_json = NULL;
43   const cJSON *problem = NULL, *elem = NULL;
44   const cJSON *item = NULL;
45   char       **filenames, **groupnames, **matnames;
46   int          i, n;
47   if (!content) return 0;
48   matrix_json = cJSON_Parse(content);
49   if (!matrix_json) return 0;
50   n          = cJSON_GetArraySize(matrix_json);
51   *nmat      = n;
52   filenames  = (char **)malloc(sizeof(char *) * n);
53   groupnames = (char **)malloc(sizeof(char *) * n);
54   matnames   = (char **)malloc(sizeof(char *) * n);
55   for (i = 0; i < n; i++) {
56     elem         = cJSON_GetArrayItem(matrix_json, i);
57     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59     strcpy(filenames[i], item->valuestring);
60     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63     strcpy(groupnames[i], item->valuestring);
64     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66     strcpy(matnames[i], item->valuestring);
67   }
68   cJSON_Delete(matrix_json);
69   free(content);
70   *outputfilenames  = filenames;
71   *outputgroupnames = groupnames;
72   *outputmatnames   = matnames;
73   return 0;
74 }
75 
76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78   char  *content     = read_file(inputjsonfile);
79   cJSON *matrix_json = NULL;
80   cJSON *elem        = NULL;
81   int    i, n;
82   if (!content) return 0;
83   matrix_json = cJSON_Parse(content);
84   if (!matrix_json) return 0;
85   n = cJSON_GetArraySize(matrix_json);
86   for (i = 0; i < n; i++) {
87     cJSON *spmv   = NULL;
88     cJSON *format = NULL;
89     elem          = cJSON_GetArrayItem(matrix_json, i);
90     spmv          = cJSON_GetObjectItem(elem, "spmv");
91     if (spmv) {
92       format = cJSON_GetObjectItem(spmv, matformat);
93       if (format) {
94         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96       } else {
97         format = cJSON_CreateObject();
98         cJSON_AddItemToObject(spmv, matformat, format);
99         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101       }
102     } else {
103       spmv = cJSON_CreateObject();
104       cJSON_AddItemToObject(elem, "spmv", spmv);
105       format = cJSON_CreateObject();
106       cJSON_AddItemToObject(spmv, matformat, format);
107       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109     }
110   }
111   free(content);
112   content = cJSON_Print(matrix_json);
113   write_file(inputjsonfile, content);
114   cJSON_Delete(matrix_json);
115   free(content);
116   return 0;
117 }
118 
119 /*
120   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121   The extra CPU copy allows us to destroy the GPU matrix and recreate it effciently
122   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124   during consecutive calls.
125 */
126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128   Mat            A2 = NULL;
129   PetscInt       i;
130   Vec            u;
131   PetscLogDouble vstart = 0, vend = 0;
132   PetscBool      isaijcusparse, isaijkokkos;
133 
134   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
135   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
136   if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA));
137   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
138   PetscCall(VecDuplicate(b, &u));
139   if (time) *time = 0.0;
140   for (i = 0; i < repetitions; i++) {
141     if (use_gpu) {
142       PetscCall(MatDestroy(&A2));
143       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
144       PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
145     } else A2 = A;
146     /* Timing MatMult */
147     if (time) PetscCall(PetscTime(&vstart));
148 
149     PetscCall(MatMult(A2, b, u));
150 
151     if (time) {
152       PetscCall(PetscTime(&vend));
153       *time += (PetscReal)(vend - vstart);
154     }
155   }
156   PetscCall(VecDestroy(&u));
157   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
158   return 0;
159 }
160 
161 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
162 {
163   PetscLogEvent      event;
164   PetscEventPerfInfo eventInfo;
165   //PetscReal          gpuflopRate;
166 
167   // if (matformat) {
168   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
169   // } else {
170   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
171   // }
172   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
173   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
174 
175   PetscCall(PetscLogEventGetId("MatMult", &event));
176   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
177   //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
178   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
179   if (cputime) *cputime = eventInfo.time;
180 #if defined(PETSC_HAVE_DEVICE)
181   if (gputime) *gputime = eventInfo.GpuTime;
182   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
183 #endif
184   return 0;
185 }
186 
187 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
188 {
189   PetscBool iscsr, issell, iscsrkokkos;
190   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
191   if (iscsr) {
192     if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
193     else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
194   } else {
195     PetscCall(PetscStrcmp(matformat, "sell", &issell));
196     if (issell) {
197       if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA
198       else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
199     } else {
200       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
201       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
202     }
203   }
204   return 0;
205 }
206 
207 int main(int argc, char **args)
208 {
209   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
210   Mat         A;
211   Vec         b;
212   char        jfilename[PETSC_MAX_PATH_LEN];
213   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
214   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
215   char       *matformats[5];
216   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
217   char        ordering[256] = MATORDERINGRCM;
218   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
219   IS          rowperm = NULL, colperm = NULL;
220   PetscViewer fd;
221   PetscReal   starting_spmv_time = 0, *spmv_times;
222 
223   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
224   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
225   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
226   if (!flg1) {
227     nformats = 1;
228     PetscCall(PetscStrallocpy("csr", &matformats[0]));
229   }
230   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
231   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
232   /* Read matrix and RHS */
233   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
234   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
235   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
236   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
237   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
238   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
239   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
240   PetscOptionsEnd();
241 #if !defined(PETSC_HAVE_DEVICE)
242   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
243 #endif
244   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
245   if (flg3) {
246     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
247     PetscCall(PetscCalloc1(nmat, &spmv_times));
248   } else if (flg2) {
249     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
250   } else if (flg1) {
251     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
252     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
253     PetscCall(MatSetType(A, MATAIJ));
254     PetscCall(MatSetFromOptions(A));
255     PetscCall(MatLoad(A, fd));
256     PetscCall(PetscViewerDestroy(&fd));
257   }
258   if (permute) {
259     Mat Aperm;
260     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
261     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
262     PetscCall(MatDestroy(&A));
263     A = Aperm; /* Replace original operator with permuted version */
264   }
265   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
266   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
267   if (bflg) {
268     PetscViewer fb;
269     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
270     PetscCall(VecSetFromOptions(b));
271     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
272     PetscCall(VecLoad(b, fb));
273     PetscCall(PetscViewerDestroy(&fb));
274   }
275 
276   for (j = 0; j < nformats; j++) {
277     char *petscmatformat = NULL;
278     MapToPetscMatType(matformats[j], use_gpu, &petscmatformat);
279     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
280     if (flg3) { // mat names specified in a JSON file
281       for (i = 0; i < nmat; i++) {
282         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
283         if (!bflg) {
284           PetscCall(MatCreateVecs(A, &b, NULL));
285           PetscCall(VecSet(b, 1.0));
286         }
287         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
288         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
289         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
290         PetscCall(MatDestroy(&A));
291         if (!bflg) PetscCall(VecDestroy(&b));
292       }
293       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
294       starting_spmv_time = spmv_times[nmat - 1];
295     } else {
296       PetscReal spmv_time;
297       if (!bflg) {
298         PetscCall(MatCreateVecs(A, &b, NULL));
299         PetscCall(VecSet(b, 1.0));
300       }
301       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
302       if (!bflg) PetscCall(VecDestroy(&b));
303     }
304     PetscCall(PetscFree(petscmatformat));
305   }
306   if (flg3) {
307     for (i = 0; i < nmat; i++) {
308       free(filenames[i]);
309       free(groupnames[i]);
310       free(matnames[i]);
311     }
312     free(filenames);
313     free(groupnames);
314     free(matnames);
315     PetscCall(PetscFree(spmv_times));
316   }
317   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
318   if (flg1 || flg2) PetscCall(MatDestroy(&A));
319   if (bflg) PetscCall(VecDestroy(&b));
320   PetscCall(ISDestroy(&rowperm));
321   PetscCall(ISDestroy(&colperm));
322   PetscCall(PetscFinalize());
323   return 0;
324 }
325 /*TEST
326 
327    build:
328       requires:  !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
329       depends: mmloader.c mmio.c cJSON.c
330 
331    test:
332       suffix: 1
333       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
334 
335    test:
336       suffix: 2
337       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
338       output_file: output/bench_spmv_1.out
339       requires: cuda
340 
341 TEST*/
342