xref: /petsc/src/mat/tests/ex6k.kokkos.cxx (revision 6c5693054f5123506dab0f5da2d352ed973d0e50)
1 static char help[] = "Benchmarking MatProduct with AIJ and its subclass matrix types\n";
2 
3 /*
4 Usage:
5   mpiexec -n <np> ./ex6k
6     -A <filename>     : input PETSc binary file for matrix A; one can convert a file from MatrixMarket using mat/tests/ex72.c
7     -P <filename>     : input PETSc binary file for matrix P; optional, if not given, P = A
8     -mat_type  <str>  : aij or its subclass. Default is aij.
9     -prod_type <str>  : AP, AtP, APt, PtAP or PAPt. Default is AP.
10     -n <num>          : run MatProductNumeric() this many times and report average time. Default is 100.
11 
12 Notes:
13   It uses CPU-timer to measure the time.
14 
15 Examples:
16   On OLCF Summit (with GPU-aware MPI)
17     # 6 MPI ranks:
18     # 6 resource sets (-n 6), 1 MPI rank per RS (-a 1), 7 CPU cores per RS (-c 7), and 1 GPU per RS (-g 1), 6 RSs per node (-r 6)
19     jsrun --smpiargs "-gpu" -n 6 -a 1 -c 7 -g 1 -r 6 ./ex6k -A cage12.aij -mat_type aijcusparse
20 
21     # 1 MPI rank
22     jsrun --smpiargs "-gpu" -n 1 -a 1 -c 7 -g 1 -r 1 ./ex6k -A cage12.aij -mat_type aijcusparse
23 
24   On OLCF Crusher:
25     # 1 MPI rank
26     # run with 1 node (-N1), 1 mpi rank (-n1), 2 hardware threads per rank (-c2)
27     srun -N1 -n1 -c2 --gpus-per-node=8 --gpu-bind=closest ./ex6k -A HV15R.aij -mat_type aijkokkos
28 
29     # 8 MPI ranks
30     srun -N1 -n8 -c2 --gpus-per-node=8 --gpu-bind=closest ./ex6k -A HV15R.aij -mat_type aijkokkos
31 */
32 #include <petscmat.h>
33 #include <petscdevice.h>
34 
35 #if defined(PETSC_HAVE_CUDA)
36   #include <petscdevice_cuda.h>
37   #define SyncDevice() PetscCallCUDA(cudaDeviceSynchronize())
38 #elif defined(PETSC_HAVE_HIP)
39   #include <petscdevice_hip.h>
40   #define SyncDevice() PetscCallHIP(hipDeviceSynchronize())
41 #elif defined(PETSC_HAVE_KOKKOS)
42   #include <Kokkos_Core.hpp>
43   #define SyncDevice() Kokkos::fence()
44 #else
45   #define SyncDevice()
46 #endif
47 
main(int argc,char ** args)48 int main(int argc, char **args)
49 {
50   Mat            A, P, C;
51   Mat            A2, P2, C2; /* Shadow matrices (of MATAIJ) of A,P,C for initialization and validation */
52   char           matTypeStr[64], prodTypeStr[32];
53   char           fileA[PETSC_MAX_PATH_LEN], fileP[PETSC_MAX_PATH_LEN];
54   PetscViewer    fdA, fdP;
55   PetscBool      flg, flgA, flgP, equal = PETSC_FALSE;
56   PetscLogStage  stage;
57   PetscInt       i, n = 100, nskip = 2, M, N;
58   MatInfo        info;
59   PetscLogDouble tstart = 0, tend = 0, avgTime;
60   PetscMPIInt    size;
61   MatProductType prodType;
62   PetscBool      isAP, isAtP, isAPt, isPtAP, isPAPt;
63 
64   PetscFunctionBeginUser;
65   PetscCall(PetscInitialize(&argc, &args, nullptr, help));
66   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
67 
68   /* Read options -n */
69   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
70 
71   /* Load the matrix from a binary file */
72   PetscCall(PetscOptionsGetString(NULL, NULL, "-A", fileA, PETSC_MAX_PATH_LEN, &flgA));
73   PetscCall(PetscOptionsGetString(NULL, NULL, "-P", fileP, PETSC_MAX_PATH_LEN, &flgP));
74   PetscCheck(flgA, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must give a PETSc matrix binary file with the -A option");
75 
76   PetscCall(PetscOptionsGetString(NULL, NULL, "-mat_type", matTypeStr, sizeof(matTypeStr), &flg));
77   if (!flg) PetscCall(PetscStrncpy(matTypeStr, MATAIJ, sizeof(matTypeStr))); /* Inject the default if not provided */
78 
79   PetscCall(PetscOptionsGetString(NULL, NULL, "-prod_type", prodTypeStr, sizeof(prodTypeStr), &flg));
80   if (!flg) PetscCall(PetscStrncpy(prodTypeStr, "AP", sizeof(prodTypeStr))); /* Inject the default if not provided */
81 
82   PetscCall(PetscStrcmp(prodTypeStr, "AP", &isAP));
83   PetscCall(PetscStrcmp(prodTypeStr, "AtP", &isAtP));
84   PetscCall(PetscStrcmp(prodTypeStr, "APt", &isAPt));
85   PetscCall(PetscStrcmp(prodTypeStr, "PtAP", &isPtAP));
86   PetscCall(PetscStrcmp(prodTypeStr, "PAPt", &isPAPt));
87 
88   if (isAP) prodType = MATPRODUCT_AB;
89   else if (isAtP) prodType = MATPRODUCT_AtB;
90   else if (isAPt) prodType = MATPRODUCT_ABt;
91   else if (isPtAP) prodType = MATPRODUCT_PtAP;
92   else if (isPAPt) prodType = MATPRODUCT_RARt;
93   else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Unsupported product type %s", prodTypeStr);
94 
95   /* Read the matrix file to A2 */
96   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, fileA, FILE_MODE_READ, &fdA));
97   PetscCall(MatCreate(PETSC_COMM_WORLD, &A2));
98   PetscCall(MatSetType(A2, MATAIJ));
99   PetscCall(MatLoad(A2, fdA));
100   PetscCall(PetscViewerDestroy(&fdA));
101 
102   PetscCall(MatGetSize(A2, &M, &N));
103   PetscCall(MatGetInfo(A2, MAT_GLOBAL_SUM, &info));
104   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix A: %s, %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", fileA, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
105 
106   /* Copy A2 to A and convert A to the specified type */
107   PetscCall(MatDuplicate(A2, MAT_COPY_VALUES, &A));
108   PetscCall(MatConvert(A, matTypeStr, MAT_INPLACE_MATRIX, &A));
109 
110   /* Init P, P2 similarly */
111   if (flgP) { /* If user provided P */
112     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, fileP, FILE_MODE_READ, &fdP));
113     PetscCall(MatCreate(PETSC_COMM_WORLD, &P2));
114     PetscCall(MatSetType(P2, MATAIJ));
115     PetscCall(MatLoad(P2, fdP));
116     PetscCall(PetscViewerDestroy(&fdP));
117 
118     PetscCall(MatDuplicate(P2, MAT_COPY_VALUES, &P));
119     PetscCall(MatConvert(P, matTypeStr, MAT_INPLACE_MATRIX, &P));
120 
121     PetscCall(MatGetSize(P2, &M, &N));
122     PetscCall(MatGetInfo(P2, MAT_GLOBAL_SUM, &info));
123     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix P: %s, %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", fileP, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
124   } else { /* otherwise just let P = A */
125     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix P = A\n"));
126     P2 = A2;
127     P  = A;
128   }
129 
130   /* Compute the reference C2 */
131   PetscCall(MatProductCreate(A2, P2, NULL, &C2));
132   PetscCall(MatProductSetType(C2, prodType));
133   PetscCall(MatProductSetFill(C2, PETSC_DEFAULT));
134   PetscCall(MatProductSetFromOptions(C2));
135   PetscCall(MatProductSymbolic(C2));
136   PetscCall(MatProductNumeric(C2));
137   PetscCall(MatGetSize(C2, &M, &N));
138   PetscCall(MatGetInfo(C2, MAT_GLOBAL_SUM, &info));
139   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Mat product  C = %s: %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", prodTypeStr, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
140 
141   /* Compute C */
142   PetscCall(MatProductCreate(A, P, NULL, &C));
143   PetscCall(MatProductSetType(C, prodType));
144   PetscCall(MatProductSetAlgorithm(C, MATPRODUCTALGORITHMBACKEND));
145   PetscCall(MatProductSetFill(C, PETSC_DEFAULT));
146   PetscCall(MatProductSetFromOptions(C));
147 
148   /* Measure  MatProductSymbolic */
149   PetscCall(PetscLogStageRegister("MatProductSymbolic", &stage));
150   PetscCall(PetscLogStagePush(stage));
151   SyncDevice();
152   PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
153   PetscCall(PetscTime(&tstart));
154   PetscCall(MatProductSymbolic(C));
155   SyncDevice();
156   PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
157   PetscCall(PetscTime(&tend));
158   avgTime = (tend - tstart) * 1e6; /* microseconds */
159   PetscCall(PetscLogStagePop());
160   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nMatProductSymbolic()         time (us) with %d MPI ranks = %8.2f\n", size, avgTime));
161 
162   /* Measure  MatProductNumeric */
163   PetscCall(PetscLogStageRegister("MatProductNumeric", &stage));
164   for (i = 0; i < n + nskip; i++) {
165     if (i == nskip) {
166       SyncDevice();
167       PetscCall(PetscLogStagePush(stage));
168       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
169       PetscCall(PetscTime(&tstart));
170     }
171     PetscCall(MatProductReplaceMats(A, P, NULL, C));
172     PetscCall(MatProductNumeric(C));
173   }
174   SyncDevice();
175   PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
176   PetscCall(PetscTime(&tend));
177   avgTime = (tend - tstart) * 1e6 / n; /* microseconds */
178   PetscCall(PetscLogStagePop());
179 
180   PetscCall(MatMultEqual(C, C2, 8, &equal)); /* Not MatEqual() since C and C2 are not necessarily bitwise equal */
181 
182   PetscCheck(equal, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Matrix production error");
183   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "MatProductNumeric()  average time (us) with %d MPI ranks = %8.2f\n", size, avgTime));
184 
185   PetscCall(MatDestroy(&A));
186   if (flgP) PetscCall(MatDestroy(&P));
187   PetscCall(MatDestroy(&C));
188 
189   PetscCall(MatDestroy(&A2));
190   if (flgP) PetscCall(MatDestroy(&P2));
191   PetscCall(MatDestroy(&C2));
192 
193   PetscCall(PetscFinalize());
194   return 0;
195 }
196 
197 /*TEST
198 
199   testset:
200     args: -n 2 -A ${DATAFILESPATH}/matrices/small
201     nsize: 1
202     filter: grep "DOES_NOT_EXIST"
203     output_file: output/empty.out
204     requires: datafilespath !complex double !defined(PETSC_USE_64BIT_INDICES) kokkos_kernels
205 
206     test:
207       suffix: 1
208       requires: cuda
209       args: -mat_type aijcusparse
210 
211     test:
212       suffix: 2
213       args: -mat_type aijkokkos
214 
215     test:
216       suffix: 3
217       requires: hip
218       args: -mat_type aijhipsparse
219 
220 TEST*/
221