xref: /petsc/src/mat/tests/bench_spmv.c (revision a68bbae58a07f2fb515cab24a67de1159d72e8a2)
1 static char help[] = "Driver for benchmarking SpMV.";
2 
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6 
7 char *read_file(const char *filename)
8 {
9   FILE  *file       = NULL;
10   long   length     = 0;
11   char  *content    = NULL;
12   size_t read_chars = 0;
13 
14   /* open in read binary mode */
15   file = fopen(filename, "rb");
16   if (file) {
17     /* get the length */
18     fseek(file, 0, SEEK_END);
19     length = ftell(file);
20     fseek(file, 0, SEEK_SET);
21     /* allocate content buffer */
22     content = (char *)malloc((size_t)length + sizeof(""));
23     /* read the file into memory */
24     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25     content[read_chars] = '\0';
26     fclose(file);
27   }
28   return content;
29 }
30 
31 void write_file(const char *filename, const char *content)
32 {
33   FILE *file = NULL;
34   file       = fopen(filename, "w");
35   if (file) { fputs(content, file); }
36   fclose(file);
37 }
38 
39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41   char        *content     = read_file(inputjsonfile);
42   cJSON       *matrix_json = NULL;
43   const cJSON *problem = NULL, *elem = NULL;
44   const cJSON *item = NULL;
45   char       **filenames, **groupnames, **matnames;
46   int          i, n;
47   if (!content) return 0;
48   matrix_json = cJSON_Parse(content);
49   if (!matrix_json) return 0;
50   n          = cJSON_GetArraySize(matrix_json);
51   *nmat      = n;
52   filenames  = (char **)malloc(sizeof(char *) * n);
53   groupnames = (char **)malloc(sizeof(char *) * n);
54   matnames   = (char **)malloc(sizeof(char *) * n);
55   for (i = 0; i < n; i++) {
56     elem         = cJSON_GetArrayItem(matrix_json, i);
57     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59     strcpy(filenames[i], item->valuestring);
60     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63     strcpy(groupnames[i], item->valuestring);
64     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66     strcpy(matnames[i], item->valuestring);
67   }
68   cJSON_Delete(matrix_json);
69   free(content);
70   *outputfilenames  = filenames;
71   *outputgroupnames = groupnames;
72   *outputmatnames   = matnames;
73   return 0;
74 }
75 
76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78   char  *content     = read_file(inputjsonfile);
79   cJSON *matrix_json = NULL;
80   cJSON *elem        = NULL;
81   int    i, n;
82   if (!content) return 0;
83   matrix_json = cJSON_Parse(content);
84   if (!matrix_json) return 0;
85   n = cJSON_GetArraySize(matrix_json);
86   for (i = 0; i < n; i++) {
87     cJSON *spmv   = NULL;
88     cJSON *format = NULL;
89     elem          = cJSON_GetArrayItem(matrix_json, i);
90     spmv          = cJSON_GetObjectItem(elem, "spmv");
91     if (spmv) {
92       format = cJSON_GetObjectItem(spmv, matformat);
93       if (format) {
94         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96       } else {
97         format = cJSON_CreateObject();
98         cJSON_AddItemToObject(spmv, matformat, format);
99         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101       }
102     } else {
103       spmv = cJSON_CreateObject();
104       cJSON_AddItemToObject(elem, "spmv", spmv);
105       format = cJSON_CreateObject();
106       cJSON_AddItemToObject(spmv, matformat, format);
107       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109     }
110   }
111   free(content);
112   content = cJSON_Print(matrix_json);
113   write_file(inputjsonfile, content);
114   cJSON_Delete(matrix_json);
115   free(content);
116   return 0;
117 }
118 
119 /*
120   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124   during consecutive calls.
125 */
126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128   Mat            A2 = NULL;
129   PetscInt       i;
130   Vec            u;
131   PetscLogDouble vstart = 0, vend = 0;
132   PetscBool      isaijcusparse, isaijkokkos;
133 
134   PetscFunctionBeginUser;
135   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
137   if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA));
138   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
139   PetscCall(VecDuplicate(b, &u));
140   if (time) *time = 0.0;
141   for (i = 0; i < repetitions; i++) {
142     if (use_gpu) {
143       PetscCall(MatDestroy(&A2));
144       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
145       PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
146     } else A2 = A;
147     /* Timing MatMult */
148     if (time) PetscCall(PetscTime(&vstart));
149 
150     PetscCall(MatMult(A2, b, u));
151 
152     if (time) {
153       PetscCall(PetscTime(&vend));
154       *time += (PetscReal)(vend - vstart);
155     }
156   }
157   PetscCall(VecDestroy(&u));
158   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
159   PetscFunctionReturn(PETSC_SUCCESS);
160 }
161 
162 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
163 {
164   PetscLogEvent      event;
165   PetscEventPerfInfo eventInfo;
166   //PetscReal          gpuflopRate;
167 
168   // if (matformat) {
169   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
170   // } else {
171   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
172   // }
173   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
174   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
175 
176   PetscFunctionBeginUser;
177   PetscCall(PetscLogEventGetId("MatMult", &event));
178   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
179   //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
180   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
181   if (cputime) *cputime = eventInfo.time;
182 #if defined(PETSC_HAVE_DEVICE)
183   if (gputime) *gputime = eventInfo.GpuTime;
184   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
185 #endif
186   PetscFunctionReturn(PETSC_SUCCESS);
187 }
188 
189 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
190 {
191   PetscBool iscsr, issell, iscsrkokkos;
192 
193   PetscFunctionBeginUser;
194   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
195   if (iscsr) {
196     if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
197     else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
198   } else {
199     PetscCall(PetscStrcmp(matformat, "sell", &issell));
200     if (issell) {
201       if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA
202       else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
203     } else {
204       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
205       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
206     }
207   }
208   PetscFunctionReturn(PETSC_SUCCESS);
209 }
210 
211 int main(int argc, char **args)
212 {
213   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
214   Mat         A;
215   Vec         b;
216   char        jfilename[PETSC_MAX_PATH_LEN];
217   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
218   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
219   char       *matformats[5];
220   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
221   char        ordering[256] = MATORDERINGRCM;
222   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
223   IS          rowperm = NULL, colperm = NULL;
224   PetscViewer fd;
225   PetscReal   starting_spmv_time = 0, *spmv_times;
226 
227   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
228   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
229   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
230   if (!flg1) {
231     nformats = 1;
232     PetscCall(PetscStrallocpy("csr", &matformats[0]));
233   }
234   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
235   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
236   /* Read matrix and RHS */
237   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
238   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
239   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
240   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
241   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
242   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
243   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
244   PetscOptionsEnd();
245 #if !defined(PETSC_HAVE_DEVICE)
246   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
247 #endif
248   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
249   if (flg3) {
250     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
251     PetscCall(PetscCalloc1(nmat, &spmv_times));
252   } else if (flg2) {
253     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
254   } else if (flg1) {
255     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
256     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
257     PetscCall(MatSetType(A, MATAIJ));
258     PetscCall(MatSetFromOptions(A));
259     PetscCall(MatLoad(A, fd));
260     PetscCall(PetscViewerDestroy(&fd));
261   }
262   if (permute) {
263     Mat Aperm;
264     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
265     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
266     PetscCall(MatDestroy(&A));
267     A = Aperm; /* Replace original operator with permuted version */
268   }
269   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
270   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
271   if (bflg) {
272     PetscViewer fb;
273     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
274     PetscCall(VecSetFromOptions(b));
275     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
276     PetscCall(VecLoad(b, fb));
277     PetscCall(PetscViewerDestroy(&fb));
278   }
279 
280   for (j = 0; j < nformats; j++) {
281     char *petscmatformat = NULL;
282     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
283     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
284     if (flg3) { // mat names specified in a JSON file
285       for (i = 0; i < nmat; i++) {
286         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
287         if (!bflg) {
288           PetscCall(MatCreateVecs(A, &b, NULL));
289           PetscCall(VecSet(b, 1.0));
290         }
291         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
292         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
293         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
294         PetscCall(MatDestroy(&A));
295         if (!bflg) PetscCall(VecDestroy(&b));
296       }
297       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
298       starting_spmv_time = spmv_times[nmat - 1];
299     } else {
300       PetscReal spmv_time;
301       if (!bflg) {
302         PetscCall(MatCreateVecs(A, &b, NULL));
303         PetscCall(VecSet(b, 1.0));
304       }
305       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
306       if (!bflg) PetscCall(VecDestroy(&b));
307     }
308     PetscCall(PetscFree(petscmatformat));
309   }
310   if (flg3) {
311     for (i = 0; i < nmat; i++) {
312       free(filenames[i]);
313       free(groupnames[i]);
314       free(matnames[i]);
315     }
316     free(filenames);
317     free(groupnames);
318     free(matnames);
319     PetscCall(PetscFree(spmv_times));
320   }
321   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
322   if (flg1 || flg2) PetscCall(MatDestroy(&A));
323   if (bflg) PetscCall(VecDestroy(&b));
324   PetscCall(ISDestroy(&rowperm));
325   PetscCall(ISDestroy(&colperm));
326   PetscCall(PetscFinalize());
327   return 0;
328 }
329 /*TEST
330 
331    build:
332       requires:  !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
333       depends: mmloader.c mmio.c cJSON.c
334 
335    test:
336       suffix: 1
337       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
338 
339    test:
340       suffix: 2
341       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
342       output_file: output/bench_spmv_1.out
343       requires: cuda
344 
345 TEST*/
346