xref: /petsc/src/mat/tests/bench_spmv.c (revision 66af8762ec03dbef0e079729eb2a1734a35ed7ff)
1 static char help[] = "Driver for benchmarking SpMV.";
2 
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6 
7 char *read_file(const char *filename)
8 {
9   FILE  *file       = NULL;
10   long   length     = 0;
11   char  *content    = NULL;
12   size_t read_chars = 0;
13 
14   /* open in read binary mode */
15   file = fopen(filename, "rb");
16   if (file) {
17     /* get the length */
18     fseek(file, 0, SEEK_END);
19     length = ftell(file);
20     fseek(file, 0, SEEK_SET);
21     /* allocate content buffer */
22     content = (char *)malloc((size_t)length + sizeof(""));
23     /* read the file into memory */
24     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25     content[read_chars] = '\0';
26     fclose(file);
27   }
28   return content;
29 }
30 
31 void write_file(const char *filename, const char *content)
32 {
33   FILE *file = NULL;
34   file       = fopen(filename, "w");
35   if (file) { fputs(content, file); }
36   fclose(file);
37 }
38 
39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41   char        *content     = read_file(inputjsonfile);
42   cJSON       *matrix_json = NULL;
43   const cJSON *problem = NULL, *elem = NULL;
44   const cJSON *item = NULL;
45   char       **filenames, **groupnames, **matnames;
46   int          i, n;
47   if (!content) return 0;
48   matrix_json = cJSON_Parse(content);
49   if (!matrix_json) return 0;
50   n          = cJSON_GetArraySize(matrix_json);
51   *nmat      = n;
52   filenames  = (char **)malloc(sizeof(char *) * n);
53   groupnames = (char **)malloc(sizeof(char *) * n);
54   matnames   = (char **)malloc(sizeof(char *) * n);
55   for (i = 0; i < n; i++) {
56     elem         = cJSON_GetArrayItem(matrix_json, i);
57     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59     strcpy(filenames[i], item->valuestring);
60     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63     strcpy(groupnames[i], item->valuestring);
64     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66     strcpy(matnames[i], item->valuestring);
67   }
68   cJSON_Delete(matrix_json);
69   free(content);
70   *outputfilenames  = filenames;
71   *outputgroupnames = groupnames;
72   *outputmatnames   = matnames;
73   return 0;
74 }
75 
76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78   char  *content     = read_file(inputjsonfile);
79   cJSON *matrix_json = NULL;
80   cJSON *elem        = NULL;
81   int    i, n;
82   if (!content) return 0;
83   matrix_json = cJSON_Parse(content);
84   if (!matrix_json) return 0;
85   n = cJSON_GetArraySize(matrix_json);
86   for (i = 0; i < n; i++) {
87     cJSON *spmv   = NULL;
88     cJSON *format = NULL;
89     elem          = cJSON_GetArrayItem(matrix_json, i);
90     spmv          = cJSON_GetObjectItem(elem, "spmv");
91     if (spmv) {
92       format = cJSON_GetObjectItem(spmv, matformat);
93       if (format) {
94         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96       } else {
97         format = cJSON_CreateObject();
98         cJSON_AddItemToObject(spmv, matformat, format);
99         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101       }
102     } else {
103       spmv = cJSON_CreateObject();
104       cJSON_AddItemToObject(elem, "spmv", spmv);
105       format = cJSON_CreateObject();
106       cJSON_AddItemToObject(spmv, matformat, format);
107       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109     }
110   }
111   free(content);
112   content = cJSON_Print(matrix_json);
113   write_file(inputjsonfile, content);
114   cJSON_Delete(matrix_json);
115   free(content);
116   return 0;
117 }
118 
119 /*
120   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124   during consecutive calls.
125 */
126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128   Mat            A2 = NULL;
129   PetscInt       i;
130   Vec            u;
131   PetscLogDouble vstart = 0, vend = 0;
132   PetscBool      isaijcusparse, isaijkokkos, issellcuda;
133 
134   PetscFunctionBeginUser;
135   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
137   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
138   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
139   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
140   PetscCall(VecDuplicate(b, &u));
141   if (time) *time = 0.0;
142   for (i = 0; i < repetitions; i++) {
143     if (use_gpu) {
144       PetscCall(MatDestroy(&A2));
145       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
146       if (issellcuda) {
147         PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2));
148         PetscCall(MatConvert(A2, MATSELLCUDA, MAT_INPLACE_MATRIX, &A2));
149       } else {
150         PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
151       }
152     } else A2 = A;
153     /* Timing MatMult */
154     if (time) PetscCall(PetscTime(&vstart));
155 
156     PetscCall(MatMult(A2, b, u));
157 
158     if (time) {
159       PetscCall(PetscTime(&vend));
160       *time += (PetscReal)(vend - vstart);
161     }
162   }
163   PetscCall(VecDestroy(&u));
164   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
165   PetscFunctionReturn(PETSC_SUCCESS);
166 }
167 
168 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
169 {
170   PetscLogEvent      event;
171   PetscEventPerfInfo eventInfo;
172   //PetscReal          gpuflopRate;
173 
174   // if (matformat) {
175   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
176   // } else {
177   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
178   // }
179   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
180   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
181 
182   PetscFunctionBeginUser;
183   PetscCall(PetscLogEventGetId("MatMult", &event));
184   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
185   //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
186   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
187   if (cputime) *cputime = eventInfo.time;
188 #if defined(PETSC_HAVE_DEVICE)
189   if (gputime) *gputime = eventInfo.GpuTime;
190   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
191 #endif
192   PetscFunctionReturn(PETSC_SUCCESS);
193 }
194 
195 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
196 {
197   PetscBool iscsr, issell, iscsrkokkos;
198 
199   PetscFunctionBeginUser;
200   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
201   if (iscsr) {
202     if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
203     else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
204   } else {
205     PetscCall(PetscStrcmp(matformat, "sell", &issell));
206     if (issell) {
207       if (use_gpu) PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
208       else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
209     } else {
210       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
211       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
212     }
213   }
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 int main(int argc, char **args)
218 {
219   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
220   Mat         A;
221   Vec         b;
222   char        jfilename[PETSC_MAX_PATH_LEN];
223   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
224   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
225   char       *matformats[5];
226   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
227   char        ordering[256] = MATORDERINGRCM;
228   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
229   IS          rowperm = NULL, colperm = NULL;
230   PetscViewer fd;
231   PetscReal   starting_spmv_time = 0, *spmv_times;
232 
233   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
234   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
235   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
236   if (!flg1) {
237     nformats = 1;
238     PetscCall(PetscStrallocpy("csr", &matformats[0]));
239   }
240   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
241   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
242   /* Read matrix and RHS */
243   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
244   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
245   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
246   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
247   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
248   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
249   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
250   PetscOptionsEnd();
251 #if !defined(PETSC_HAVE_DEVICE)
252   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
253 #endif
254   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
255   if (flg3) {
256     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
257     PetscCall(PetscCalloc1(nmat, &spmv_times));
258   } else if (flg2) {
259     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
260   } else if (flg1) {
261     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
262     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
263     PetscCall(MatSetType(A, MATAIJ));
264     PetscCall(MatSetFromOptions(A));
265     PetscCall(MatLoad(A, fd));
266     PetscCall(PetscViewerDestroy(&fd));
267   }
268   if (permute) {
269     Mat Aperm;
270     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
271     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
272     PetscCall(MatDestroy(&A));
273     A = Aperm; /* Replace original operator with permuted version */
274   }
275   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
276   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
277   if (bflg) {
278     PetscViewer fb;
279     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
280     PetscCall(VecSetFromOptions(b));
281     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
282     PetscCall(VecLoad(b, fb));
283     PetscCall(PetscViewerDestroy(&fb));
284   }
285 
286   for (j = 0; j < nformats; j++) {
287     char *petscmatformat = NULL;
288     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
289     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
290     if (flg3) { // mat names specified in a JSON file
291       for (i = 0; i < nmat; i++) {
292         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
293         if (!bflg) {
294           PetscCall(MatCreateVecs(A, &b, NULL));
295           PetscCall(VecSet(b, 1.0));
296         }
297         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
298         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
299         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
300         PetscCall(MatDestroy(&A));
301         if (!bflg) PetscCall(VecDestroy(&b));
302       }
303       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
304       starting_spmv_time = spmv_times[nmat - 1];
305     } else {
306       PetscReal spmv_time;
307       if (!bflg) {
308         PetscCall(MatCreateVecs(A, &b, NULL));
309         PetscCall(VecSet(b, 1.0));
310       }
311       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
312       if (!bflg) PetscCall(VecDestroy(&b));
313     }
314     PetscCall(PetscFree(petscmatformat));
315   }
316   if (flg3) {
317     for (i = 0; i < nmat; i++) {
318       free(filenames[i]);
319       free(groupnames[i]);
320       free(matnames[i]);
321     }
322     free(filenames);
323     free(groupnames);
324     free(matnames);
325     PetscCall(PetscFree(spmv_times));
326   }
327   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
328   if (flg1 || flg2) PetscCall(MatDestroy(&A));
329   if (bflg) PetscCall(VecDestroy(&b));
330   PetscCall(ISDestroy(&rowperm));
331   PetscCall(ISDestroy(&colperm));
332   PetscCall(PetscFinalize());
333   return 0;
334 }
335 /*TEST
336 
337    build:
338       requires:  !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
339       depends: mmloader.c mmio.c cJSON.c
340 
341    test:
342       suffix: 1
343       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
344 
345    test:
346       suffix: 2
347       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
348       output_file: output/bench_spmv_1.out
349       requires: cuda
350 
351 TEST*/
352