xref: /petsc/src/mat/tests/bench_spmv.c (revision 017deb10d530c1b6d9744fcd772cd96c5fcd74f2)
189928cc5SHong Zhang static char help[] = "Driver for benchmarking SpMV.";
289928cc5SHong Zhang 
389928cc5SHong Zhang #include <petscmat.h>
489928cc5SHong Zhang #include "cJSON.h"
589928cc5SHong Zhang #include "mmloader.h"
689928cc5SHong Zhang 
read_file(const char * filename)789928cc5SHong Zhang char *read_file(const char *filename)
889928cc5SHong Zhang {
989928cc5SHong Zhang   FILE  *file       = NULL;
1089928cc5SHong Zhang   long   length     = 0;
1189928cc5SHong Zhang   char  *content    = NULL;
1289928cc5SHong Zhang   size_t read_chars = 0;
1389928cc5SHong Zhang 
1489928cc5SHong Zhang   /* open in read binary mode */
1589928cc5SHong Zhang   file = fopen(filename, "rb");
1689928cc5SHong Zhang   if (file) {
1789928cc5SHong Zhang     /* get the length */
1889928cc5SHong Zhang     fseek(file, 0, SEEK_END);
1989928cc5SHong Zhang     length = ftell(file);
2089928cc5SHong Zhang     fseek(file, 0, SEEK_SET);
2189928cc5SHong Zhang     /* allocate content buffer */
2289928cc5SHong Zhang     content = (char *)malloc((size_t)length + sizeof(""));
2389928cc5SHong Zhang     /* read the file into memory */
2489928cc5SHong Zhang     read_chars          = fread(content, sizeof(char), (size_t)length, file);
2589928cc5SHong Zhang     content[read_chars] = '\0';
2689928cc5SHong Zhang     fclose(file);
2789928cc5SHong Zhang   }
2889928cc5SHong Zhang   return content;
2989928cc5SHong Zhang }
3089928cc5SHong Zhang 
write_file(const char * filename,const char * content)3189928cc5SHong Zhang void write_file(const char *filename, const char *content)
3289928cc5SHong Zhang {
3389928cc5SHong Zhang   FILE *file = NULL;
3489928cc5SHong Zhang   file       = fopen(filename, "w");
35ac530a7eSPierre Jolivet   if (file) fputs(content, file);
3689928cc5SHong Zhang   fclose(file);
3789928cc5SHong Zhang }
3889928cc5SHong Zhang 
ParseJSON(const char * const inputjsonfile,char *** outputfilenames,char *** outputgroupnames,char *** outputmatnames,int * nmat)3989928cc5SHong Zhang int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
4089928cc5SHong Zhang {
4189928cc5SHong Zhang   char        *content     = read_file(inputjsonfile);
4289928cc5SHong Zhang   cJSON       *matrix_json = NULL;
4389928cc5SHong Zhang   const cJSON *problem = NULL, *elem = NULL;
4489928cc5SHong Zhang   const cJSON *item = NULL;
4589928cc5SHong Zhang   char       **filenames, **groupnames, **matnames;
4689928cc5SHong Zhang   int          i, n;
4789928cc5SHong Zhang   if (!content) return 0;
4889928cc5SHong Zhang   matrix_json = cJSON_Parse(content);
4989928cc5SHong Zhang   if (!matrix_json) return 0;
5089928cc5SHong Zhang   n          = cJSON_GetArraySize(matrix_json);
5189928cc5SHong Zhang   *nmat      = n;
5289928cc5SHong Zhang   filenames  = (char **)malloc(sizeof(char *) * n);
5389928cc5SHong Zhang   groupnames = (char **)malloc(sizeof(char *) * n);
5489928cc5SHong Zhang   matnames   = (char **)malloc(sizeof(char *) * n);
5589928cc5SHong Zhang   for (i = 0; i < n; i++) {
5689928cc5SHong Zhang     elem         = cJSON_GetArrayItem(matrix_json, i);
5789928cc5SHong Zhang     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
5889928cc5SHong Zhang     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
5989928cc5SHong Zhang     strcpy(filenames[i], item->valuestring);
6089928cc5SHong Zhang     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
6189928cc5SHong Zhang     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
6289928cc5SHong Zhang     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
6389928cc5SHong Zhang     strcpy(groupnames[i], item->valuestring);
6489928cc5SHong Zhang     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
6589928cc5SHong Zhang     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
6689928cc5SHong Zhang     strcpy(matnames[i], item->valuestring);
6789928cc5SHong Zhang   }
6889928cc5SHong Zhang   cJSON_Delete(matrix_json);
6989928cc5SHong Zhang   free(content);
7089928cc5SHong Zhang   *outputfilenames  = filenames;
7189928cc5SHong Zhang   *outputgroupnames = groupnames;
7289928cc5SHong Zhang   *outputmatnames   = matnames;
7389928cc5SHong Zhang   return 0;
7489928cc5SHong Zhang }
7589928cc5SHong Zhang 
UpdateJSON(const char * const inputjsonfile,PetscReal * spmv_times,PetscReal starting_spmv_time,const char * const matformat,PetscBool use_gpu,PetscInt repetitions)7689928cc5SHong Zhang int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
7789928cc5SHong Zhang {
7889928cc5SHong Zhang   char  *content     = read_file(inputjsonfile);
7989928cc5SHong Zhang   cJSON *matrix_json = NULL;
8089928cc5SHong Zhang   cJSON *elem        = NULL;
8189928cc5SHong Zhang   int    i, n;
8289928cc5SHong Zhang   if (!content) return 0;
8389928cc5SHong Zhang   matrix_json = cJSON_Parse(content);
8489928cc5SHong Zhang   if (!matrix_json) return 0;
8589928cc5SHong Zhang   n = cJSON_GetArraySize(matrix_json);
8689928cc5SHong Zhang   for (i = 0; i < n; i++) {
8789928cc5SHong Zhang     cJSON *spmv   = NULL;
8889928cc5SHong Zhang     cJSON *format = NULL;
8989928cc5SHong Zhang     elem          = cJSON_GetArrayItem(matrix_json, i);
9089928cc5SHong Zhang     spmv          = cJSON_GetObjectItem(elem, "spmv");
9189928cc5SHong Zhang     if (spmv) {
9289928cc5SHong Zhang       format = cJSON_GetObjectItem(spmv, matformat);
9389928cc5SHong Zhang       if (format) {
9489928cc5SHong Zhang         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
9589928cc5SHong Zhang         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
9689928cc5SHong Zhang       } else {
9789928cc5SHong Zhang         format = cJSON_CreateObject();
9889928cc5SHong Zhang         cJSON_AddItemToObject(spmv, matformat, format);
9989928cc5SHong Zhang         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
10089928cc5SHong Zhang         cJSON_AddNumberToObject(format, "repetitions", repetitions);
10189928cc5SHong Zhang       }
10289928cc5SHong Zhang     } else {
10389928cc5SHong Zhang       spmv = cJSON_CreateObject();
10489928cc5SHong Zhang       cJSON_AddItemToObject(elem, "spmv", spmv);
10589928cc5SHong Zhang       format = cJSON_CreateObject();
10689928cc5SHong Zhang       cJSON_AddItemToObject(spmv, matformat, format);
10789928cc5SHong Zhang       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
10889928cc5SHong Zhang       cJSON_AddNumberToObject(format, "repetitions", repetitions);
10989928cc5SHong Zhang     }
11089928cc5SHong Zhang   }
11189928cc5SHong Zhang   free(content);
11289928cc5SHong Zhang   content = cJSON_Print(matrix_json);
11389928cc5SHong Zhang   write_file(inputjsonfile, content);
11489928cc5SHong Zhang   cJSON_Delete(matrix_json);
11589928cc5SHong Zhang   free(content);
11689928cc5SHong Zhang   return 0;
11789928cc5SHong Zhang }
11889928cc5SHong Zhang 
11989928cc5SHong Zhang /*
12089928cc5SHong Zhang   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121aaa8cc7dSPierre Jolivet   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
12289928cc5SHong Zhang   in each repetition. As a result,  each MatMult call is fresh, and we can capture
12389928cc5SHong Zhang   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
12489928cc5SHong Zhang   during consecutive calls.
12589928cc5SHong Zhang */
TimedSpMV(Mat A,Vec b,PetscReal * time,const char * petscmatformat,PetscBool use_gpu,PetscInt repetitions)12689928cc5SHong Zhang PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
12789928cc5SHong Zhang {
12889928cc5SHong Zhang   Mat            A2 = NULL;
12989928cc5SHong Zhang   PetscInt       i;
13089928cc5SHong Zhang   Vec            u;
13189928cc5SHong Zhang   PetscLogDouble vstart = 0, vend = 0;
132773bf0f6SHong Zhang   PetscBool      isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
13389928cc5SHong Zhang 
1343ba16761SJacob Faibussowitsch   PetscFunctionBeginUser;
13589928cc5SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136773bf0f6SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
13789928cc5SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
138d4fb9cc0SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
139773bf0f6SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
140d4fb9cc0SHong Zhang   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
14189928cc5SHong Zhang   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
142773bf0f6SHong Zhang   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
14389928cc5SHong Zhang   PetscCall(VecDuplicate(b, &u));
14489928cc5SHong Zhang   if (time) *time = 0.0;
14589928cc5SHong Zhang   for (i = 0; i < repetitions; i++) {
14689928cc5SHong Zhang     if (use_gpu) {
14789928cc5SHong Zhang       PetscCall(MatDestroy(&A2));
14889928cc5SHong Zhang       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
149b79e0553SHong Zhang       PetscCall(MatSetType(A2, petscmatformat));
150f640565fSHong Zhang       PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL
15189928cc5SHong Zhang     } else A2 = A;
15289928cc5SHong Zhang     /* Timing MatMult */
15389928cc5SHong Zhang     if (time) PetscCall(PetscTime(&vstart));
15489928cc5SHong Zhang 
15589928cc5SHong Zhang     PetscCall(MatMult(A2, b, u));
15689928cc5SHong Zhang 
15789928cc5SHong Zhang     if (time) {
15889928cc5SHong Zhang       PetscCall(PetscTime(&vend));
15989928cc5SHong Zhang       *time += (PetscReal)(vend - vstart);
16089928cc5SHong Zhang     }
16189928cc5SHong Zhang   }
16289928cc5SHong Zhang   PetscCall(VecDestroy(&u));
16389928cc5SHong Zhang   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
1643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16589928cc5SHong Zhang }
16689928cc5SHong Zhang 
WarmUpDevice(Mat A,Vec b,const char * petscmatformat)167f640565fSHong Zhang PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat)
168f640565fSHong Zhang {
169b79e0553SHong Zhang   Mat           A2 = NULL;
170f640565fSHong Zhang   PetscLogEvent event;
171f640565fSHong Zhang   Vec           u;
172f640565fSHong Zhang   PetscBool     isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
173f640565fSHong Zhang 
174f640565fSHong Zhang   PetscFunctionBeginUser;
175f640565fSHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
176f640565fSHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
177f640565fSHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
178f640565fSHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
179f640565fSHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
180f640565fSHong Zhang   if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS);
181f640565fSHong Zhang   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
182f640565fSHong Zhang   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
183f640565fSHong Zhang   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
184f640565fSHong Zhang   PetscCall(VecDuplicate(b, &u));
185b79e0553SHong Zhang   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
186b79e0553SHong Zhang   PetscCall(MatSetType(A2, petscmatformat));
187f640565fSHong Zhang   PetscCall(PetscLogEventGetId("MatMult", &event));
188f640565fSHong Zhang   PetscCall(PetscLogEventDeactivatePush(event));
189b79e0553SHong Zhang   PetscCall(MatMult(A2, b, u));
190f640565fSHong Zhang   PetscCall(PetscLogEventDeactivatePop(event));
191f640565fSHong Zhang   PetscCall(VecDestroy(&u));
192b79e0553SHong Zhang   PetscCall(MatDestroy(&A2));
193f640565fSHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
194f640565fSHong Zhang }
195f640565fSHong Zhang 
PetscLogSpMVTime(PetscReal * gputime,PetscReal * cputime,PetscReal * gpuflops,const char * petscmatformat)19689928cc5SHong Zhang PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
19789928cc5SHong Zhang {
19889928cc5SHong Zhang   PetscLogEvent      event;
19989928cc5SHong Zhang   PetscEventPerfInfo eventInfo;
20089928cc5SHong Zhang   // PetscReal          gpuflopRate;
20189928cc5SHong Zhang 
20289928cc5SHong Zhang   // if (matformat) {
20389928cc5SHong Zhang   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
20489928cc5SHong Zhang   // } else {
20589928cc5SHong Zhang   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
20689928cc5SHong Zhang   // }
20789928cc5SHong Zhang   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
20889928cc5SHong Zhang   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
20989928cc5SHong Zhang 
2103ba16761SJacob Faibussowitsch   PetscFunctionBeginUser;
21189928cc5SHong Zhang   PetscCall(PetscLogEventGetId("MatMult", &event));
21289928cc5SHong Zhang   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
21389928cc5SHong Zhang   // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
21489928cc5SHong Zhang   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
21589928cc5SHong Zhang   if (cputime) *cputime = eventInfo.time;
21689928cc5SHong Zhang #if defined(PETSC_HAVE_DEVICE)
21789928cc5SHong Zhang   if (gputime) *gputime = eventInfo.GpuTime;
21889928cc5SHong Zhang   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
21989928cc5SHong Zhang #endif
2203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22189928cc5SHong Zhang }
22289928cc5SHong Zhang 
MapToPetscMatType(const char * matformat,PetscBool use_gpu,char ** petscmatformat)22389928cc5SHong Zhang PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
22489928cc5SHong Zhang {
22589928cc5SHong Zhang   PetscBool iscsr, issell, iscsrkokkos;
2263ba16761SJacob Faibussowitsch 
2273ba16761SJacob Faibussowitsch   PetscFunctionBeginUser;
22889928cc5SHong Zhang   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
22989928cc5SHong Zhang   if (iscsr) {
230773bf0f6SHong Zhang     if (use_gpu) {
231773bf0f6SHong Zhang #if defined(PETSC_HAVE_CUDA)
232773bf0f6SHong Zhang       PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
233773bf0f6SHong Zhang #endif
234773bf0f6SHong Zhang #if defined(PETSC_HAVE_HIP)
235773bf0f6SHong Zhang       PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat));
236773bf0f6SHong Zhang #endif
237773bf0f6SHong Zhang     } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
23889928cc5SHong Zhang   } else {
23989928cc5SHong Zhang     PetscCall(PetscStrcmp(matformat, "sell", &issell));
24089928cc5SHong Zhang     if (issell) {
241773bf0f6SHong Zhang       if (use_gpu) {
242773bf0f6SHong Zhang #if defined(PETSC_HAVE_CUDA)
243773bf0f6SHong Zhang         PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
244773bf0f6SHong Zhang #endif
245773bf0f6SHong Zhang #if defined(PETSC_HAVE_HIP)
246773bf0f6SHong Zhang         PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat));
247773bf0f6SHong Zhang #endif
248773bf0f6SHong Zhang       } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
24989928cc5SHong Zhang     } else {
25089928cc5SHong Zhang       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
25189928cc5SHong Zhang       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
25289928cc5SHong Zhang     }
25389928cc5SHong Zhang   }
2543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25589928cc5SHong Zhang }
25689928cc5SHong Zhang 
main(int argc,char ** args)25789928cc5SHong Zhang int main(int argc, char **args)
25889928cc5SHong Zhang {
25989928cc5SHong Zhang   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
26089928cc5SHong Zhang   Mat         A;
26189928cc5SHong Zhang   Vec         b;
26289928cc5SHong Zhang   char        jfilename[PETSC_MAX_PATH_LEN];
26389928cc5SHong Zhang   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
26489928cc5SHong Zhang   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
26589928cc5SHong Zhang   char       *matformats[5];
26689928cc5SHong Zhang   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
267f06bc391SHong Zhang   char        ordering[256] = MATORDERINGRCM;
268f06bc391SHong Zhang   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
269f06bc391SHong Zhang   IS          rowperm = NULL, colperm = NULL;
27089928cc5SHong Zhang   PetscViewer fd;
27189928cc5SHong Zhang   PetscReal   starting_spmv_time = 0, *spmv_times;
27289928cc5SHong Zhang 
273*27d4e10aSPierre Jolivet   PetscCall(PetscOptionsInsertString(NULL, "-log_view :/dev/null"));
274*27d4e10aSPierre Jolivet   if (PetscDefined(HAVE_DEVICE)) PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time"));
275c8025a54SPierre Jolivet   PetscCall(PetscInitialize(&argc, &args, NULL, help));
27689928cc5SHong Zhang   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
27789928cc5SHong Zhang   if (!flg1) {
27889928cc5SHong Zhang     nformats = 1;
27989928cc5SHong Zhang     PetscCall(PetscStrallocpy("csr", &matformats[0]));
28089928cc5SHong Zhang   }
28189928cc5SHong Zhang   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
28289928cc5SHong Zhang   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
28389928cc5SHong Zhang   /* Read matrix and RHS */
28489928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
28589928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
28689928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
28789928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
28889928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
289f06bc391SHong Zhang   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
290f06bc391SHong Zhang   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
291f06bc391SHong Zhang   PetscOptionsEnd();
29289928cc5SHong Zhang #if !defined(PETSC_HAVE_DEVICE)
29389928cc5SHong Zhang   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
29489928cc5SHong Zhang #endif
29589928cc5SHong Zhang   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
29689928cc5SHong Zhang   if (flg3) {
29789928cc5SHong Zhang     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
29889928cc5SHong Zhang     PetscCall(PetscCalloc1(nmat, &spmv_times));
29989928cc5SHong Zhang   } else if (flg2) {
30089928cc5SHong Zhang     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
30189928cc5SHong Zhang   } else if (flg1) {
30289928cc5SHong Zhang     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
30389928cc5SHong Zhang     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
30489928cc5SHong Zhang     PetscCall(MatSetType(A, MATAIJ));
30589928cc5SHong Zhang     PetscCall(MatSetFromOptions(A));
30689928cc5SHong Zhang     PetscCall(MatLoad(A, fd));
30789928cc5SHong Zhang     PetscCall(PetscViewerDestroy(&fd));
30889928cc5SHong Zhang   }
309f06bc391SHong Zhang   if (permute) {
310f06bc391SHong Zhang     Mat Aperm;
311f06bc391SHong Zhang     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
312f06bc391SHong Zhang     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
313f06bc391SHong Zhang     PetscCall(MatDestroy(&A));
314f06bc391SHong Zhang     A = Aperm; /* Replace original operator with permuted version */
315f06bc391SHong Zhang   }
31689928cc5SHong Zhang   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
31789928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
31889928cc5SHong Zhang   if (bflg) {
31989928cc5SHong Zhang     PetscViewer fb;
32089928cc5SHong Zhang     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
32189928cc5SHong Zhang     PetscCall(VecSetFromOptions(b));
32289928cc5SHong Zhang     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
32389928cc5SHong Zhang     PetscCall(VecLoad(b, fb));
32489928cc5SHong Zhang     PetscCall(PetscViewerDestroy(&fb));
32589928cc5SHong Zhang   }
32689928cc5SHong Zhang 
32789928cc5SHong Zhang   for (j = 0; j < nformats; j++) {
32889928cc5SHong Zhang     char *petscmatformat = NULL;
3293ba16761SJacob Faibussowitsch     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
33089928cc5SHong Zhang     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
33189928cc5SHong Zhang     if (flg3) { // mat names specified in a JSON file
33289928cc5SHong Zhang       for (i = 0; i < nmat; i++) {
33389928cc5SHong Zhang         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
33489928cc5SHong Zhang         if (!bflg) {
33589928cc5SHong Zhang           PetscCall(MatCreateVecs(A, &b, NULL));
33689928cc5SHong Zhang           PetscCall(VecSet(b, 1.0));
33789928cc5SHong Zhang         }
338f640565fSHong Zhang         if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
33989928cc5SHong Zhang         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
34089928cc5SHong Zhang         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
34189928cc5SHong Zhang         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
34289928cc5SHong Zhang         PetscCall(MatDestroy(&A));
34389928cc5SHong Zhang         if (!bflg) PetscCall(VecDestroy(&b));
34489928cc5SHong Zhang       }
34589928cc5SHong Zhang       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
34689928cc5SHong Zhang       starting_spmv_time = spmv_times[nmat - 1];
34789928cc5SHong Zhang     } else {
34889928cc5SHong Zhang       PetscReal spmv_time;
34989928cc5SHong Zhang       if (!bflg) {
35089928cc5SHong Zhang         PetscCall(MatCreateVecs(A, &b, NULL));
35189928cc5SHong Zhang         PetscCall(VecSet(b, 1.0));
35289928cc5SHong Zhang       }
353f640565fSHong Zhang       if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
35489928cc5SHong Zhang       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
35589928cc5SHong Zhang       if (!bflg) PetscCall(VecDestroy(&b));
35689928cc5SHong Zhang     }
35789928cc5SHong Zhang     PetscCall(PetscFree(petscmatformat));
35889928cc5SHong Zhang   }
35989928cc5SHong Zhang   if (flg3) {
36089928cc5SHong Zhang     for (i = 0; i < nmat; i++) {
36189928cc5SHong Zhang       free(filenames[i]);
36289928cc5SHong Zhang       free(groupnames[i]);
36389928cc5SHong Zhang       free(matnames[i]);
36489928cc5SHong Zhang     }
36589928cc5SHong Zhang     free(filenames);
36689928cc5SHong Zhang     free(groupnames);
36789928cc5SHong Zhang     free(matnames);
36889928cc5SHong Zhang     PetscCall(PetscFree(spmv_times));
36989928cc5SHong Zhang   }
37089928cc5SHong Zhang   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
37189928cc5SHong Zhang   if (flg1 || flg2) PetscCall(MatDestroy(&A));
37289928cc5SHong Zhang   if (bflg) PetscCall(VecDestroy(&b));
373f06bc391SHong Zhang   PetscCall(ISDestroy(&rowperm));
374f06bc391SHong Zhang   PetscCall(ISDestroy(&colperm));
37589928cc5SHong Zhang   PetscCall(PetscFinalize());
37689928cc5SHong Zhang   return 0;
37789928cc5SHong Zhang }
37889928cc5SHong Zhang /*TEST
37989928cc5SHong Zhang 
38089928cc5SHong Zhang    build:
38189928cc5SHong Zhang       requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
38289928cc5SHong Zhang       depends: mmloader.c mmio.c cJSON.c
38389928cc5SHong Zhang 
38489928cc5SHong Zhang    test:
38589928cc5SHong Zhang       suffix: 1
38689928cc5SHong Zhang       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
3873886731fSPierre Jolivet       output_file: output/empty.out
38889928cc5SHong Zhang 
38989928cc5SHong Zhang    test:
39089928cc5SHong Zhang       suffix: 2
39189928cc5SHong Zhang       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
3923886731fSPierre Jolivet       output_file: output/empty.out
39389928cc5SHong Zhang       requires: cuda
39489928cc5SHong Zhang 
395773bf0f6SHong Zhang    test:
396773bf0f6SHong Zhang       suffix: 3
397773bf0f6SHong Zhang       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
3983886731fSPierre Jolivet       output_file: output/empty.out
399773bf0f6SHong Zhang       requires: hip
400773bf0f6SHong Zhang 
40189928cc5SHong Zhang TEST*/
402