1 static char help[] = "Benchmarking MatProduct with AIJ and its subclass matrix types\n";
2
3 /*
4 Usage:
5 mpiexec -n <np> ./ex6k
6 -A <filename> : input PETSc binary file for matrix A; one can convert a file from MatrixMarket using mat/tests/ex72.c
7 -P <filename> : input PETSc binary file for matrix P; optional, if not given, P = A
8 -mat_type <str> : aij or its subclass. Default is aij.
9 -prod_type <str> : AP, AtP, APt, PtAP or PAPt. Default is AP.
10 -n <num> : run MatProductNumeric() this many times and report average time. Default is 100.
11
12 Notes:
13 It uses CPU-timer to measure the time.
14
15 Examples:
16 On OLCF Summit (with GPU-aware MPI)
17 # 6 MPI ranks:
18 # 6 resource sets (-n 6), 1 MPI rank per RS (-a 1), 7 CPU cores per RS (-c 7), and 1 GPU per RS (-g 1), 6 RSs per node (-r 6)
19 jsrun --smpiargs "-gpu" -n 6 -a 1 -c 7 -g 1 -r 6 ./ex6k -A cage12.aij -mat_type aijcusparse
20
21 # 1 MPI rank
22 jsrun --smpiargs "-gpu" -n 1 -a 1 -c 7 -g 1 -r 1 ./ex6k -A cage12.aij -mat_type aijcusparse
23
24 On OLCF Crusher:
25 # 1 MPI rank
26 # run with 1 node (-N1), 1 mpi rank (-n1), 2 hardware threads per rank (-c2)
27 srun -N1 -n1 -c2 --gpus-per-node=8 --gpu-bind=closest ./ex6k -A HV15R.aij -mat_type aijkokkos
28
29 # 8 MPI ranks
30 srun -N1 -n8 -c2 --gpus-per-node=8 --gpu-bind=closest ./ex6k -A HV15R.aij -mat_type aijkokkos
31 */
32 #include <petscmat.h>
33 #include <petscdevice.h>
34
35 #if defined(PETSC_HAVE_CUDA)
36 #include <petscdevice_cuda.h>
37 #define SyncDevice() PetscCallCUDA(cudaDeviceSynchronize())
38 #elif defined(PETSC_HAVE_HIP)
39 #include <petscdevice_hip.h>
40 #define SyncDevice() PetscCallHIP(hipDeviceSynchronize())
41 #elif defined(PETSC_HAVE_KOKKOS)
42 #include <Kokkos_Core.hpp>
43 #define SyncDevice() Kokkos::fence()
44 #else
45 #define SyncDevice()
46 #endif
47
main(int argc,char ** args)48 int main(int argc, char **args)
49 {
50 Mat A, P, C;
51 Mat A2, P2, C2; /* Shadow matrices (of MATAIJ) of A,P,C for initialization and validation */
52 char matTypeStr[64], prodTypeStr[32];
53 char fileA[PETSC_MAX_PATH_LEN], fileP[PETSC_MAX_PATH_LEN];
54 PetscViewer fdA, fdP;
55 PetscBool flg, flgA, flgP, equal = PETSC_FALSE;
56 PetscLogStage stage;
57 PetscInt i, n = 100, nskip = 2, M, N;
58 MatInfo info;
59 PetscLogDouble tstart = 0, tend = 0, avgTime;
60 PetscMPIInt size;
61 MatProductType prodType;
62 PetscBool isAP, isAtP, isAPt, isPtAP, isPAPt;
63
64 PetscFunctionBeginUser;
65 PetscCall(PetscInitialize(&argc, &args, nullptr, help));
66 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
67
68 /* Read options -n */
69 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
70
71 /* Load the matrix from a binary file */
72 PetscCall(PetscOptionsGetString(NULL, NULL, "-A", fileA, PETSC_MAX_PATH_LEN, &flgA));
73 PetscCall(PetscOptionsGetString(NULL, NULL, "-P", fileP, PETSC_MAX_PATH_LEN, &flgP));
74 PetscCheck(flgA, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must give a PETSc matrix binary file with the -A option");
75
76 PetscCall(PetscOptionsGetString(NULL, NULL, "-mat_type", matTypeStr, sizeof(matTypeStr), &flg));
77 if (!flg) PetscCall(PetscStrncpy(matTypeStr, MATAIJ, sizeof(matTypeStr))); /* Inject the default if not provided */
78
79 PetscCall(PetscOptionsGetString(NULL, NULL, "-prod_type", prodTypeStr, sizeof(prodTypeStr), &flg));
80 if (!flg) PetscCall(PetscStrncpy(prodTypeStr, "AP", sizeof(prodTypeStr))); /* Inject the default if not provided */
81
82 PetscCall(PetscStrcmp(prodTypeStr, "AP", &isAP));
83 PetscCall(PetscStrcmp(prodTypeStr, "AtP", &isAtP));
84 PetscCall(PetscStrcmp(prodTypeStr, "APt", &isAPt));
85 PetscCall(PetscStrcmp(prodTypeStr, "PtAP", &isPtAP));
86 PetscCall(PetscStrcmp(prodTypeStr, "PAPt", &isPAPt));
87
88 if (isAP) prodType = MATPRODUCT_AB;
89 else if (isAtP) prodType = MATPRODUCT_AtB;
90 else if (isAPt) prodType = MATPRODUCT_ABt;
91 else if (isPtAP) prodType = MATPRODUCT_PtAP;
92 else if (isPAPt) prodType = MATPRODUCT_RARt;
93 else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Unsupported product type %s", prodTypeStr);
94
95 /* Read the matrix file to A2 */
96 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, fileA, FILE_MODE_READ, &fdA));
97 PetscCall(MatCreate(PETSC_COMM_WORLD, &A2));
98 PetscCall(MatSetType(A2, MATAIJ));
99 PetscCall(MatLoad(A2, fdA));
100 PetscCall(PetscViewerDestroy(&fdA));
101
102 PetscCall(MatGetSize(A2, &M, &N));
103 PetscCall(MatGetInfo(A2, MAT_GLOBAL_SUM, &info));
104 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix A: %s, %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", fileA, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
105
106 /* Copy A2 to A and convert A to the specified type */
107 PetscCall(MatDuplicate(A2, MAT_COPY_VALUES, &A));
108 PetscCall(MatConvert(A, matTypeStr, MAT_INPLACE_MATRIX, &A));
109
110 /* Init P, P2 similarly */
111 if (flgP) { /* If user provided P */
112 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, fileP, FILE_MODE_READ, &fdP));
113 PetscCall(MatCreate(PETSC_COMM_WORLD, &P2));
114 PetscCall(MatSetType(P2, MATAIJ));
115 PetscCall(MatLoad(P2, fdP));
116 PetscCall(PetscViewerDestroy(&fdP));
117
118 PetscCall(MatDuplicate(P2, MAT_COPY_VALUES, &P));
119 PetscCall(MatConvert(P, matTypeStr, MAT_INPLACE_MATRIX, &P));
120
121 PetscCall(MatGetSize(P2, &M, &N));
122 PetscCall(MatGetInfo(P2, MAT_GLOBAL_SUM, &info));
123 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix P: %s, %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", fileP, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
124 } else { /* otherwise just let P = A */
125 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix P = A\n"));
126 P2 = A2;
127 P = A;
128 }
129
130 /* Compute the reference C2 */
131 PetscCall(MatProductCreate(A2, P2, NULL, &C2));
132 PetscCall(MatProductSetType(C2, prodType));
133 PetscCall(MatProductSetFill(C2, PETSC_DEFAULT));
134 PetscCall(MatProductSetFromOptions(C2));
135 PetscCall(MatProductSymbolic(C2));
136 PetscCall(MatProductNumeric(C2));
137 PetscCall(MatGetSize(C2, &M, &N));
138 PetscCall(MatGetInfo(C2, MAT_GLOBAL_SUM, &info));
139 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Mat product C = %s: %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", prodTypeStr, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
140
141 /* Compute C */
142 PetscCall(MatProductCreate(A, P, NULL, &C));
143 PetscCall(MatProductSetType(C, prodType));
144 PetscCall(MatProductSetAlgorithm(C, MATPRODUCTALGORITHMBACKEND));
145 PetscCall(MatProductSetFill(C, PETSC_DEFAULT));
146 PetscCall(MatProductSetFromOptions(C));
147
148 /* Measure MatProductSymbolic */
149 PetscCall(PetscLogStageRegister("MatProductSymbolic", &stage));
150 PetscCall(PetscLogStagePush(stage));
151 SyncDevice();
152 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
153 PetscCall(PetscTime(&tstart));
154 PetscCall(MatProductSymbolic(C));
155 SyncDevice();
156 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
157 PetscCall(PetscTime(&tend));
158 avgTime = (tend - tstart) * 1e6; /* microseconds */
159 PetscCall(PetscLogStagePop());
160 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nMatProductSymbolic() time (us) with %d MPI ranks = %8.2f\n", size, avgTime));
161
162 /* Measure MatProductNumeric */
163 PetscCall(PetscLogStageRegister("MatProductNumeric", &stage));
164 for (i = 0; i < n + nskip; i++) {
165 if (i == nskip) {
166 SyncDevice();
167 PetscCall(PetscLogStagePush(stage));
168 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
169 PetscCall(PetscTime(&tstart));
170 }
171 PetscCall(MatProductReplaceMats(A, P, NULL, C));
172 PetscCall(MatProductNumeric(C));
173 }
174 SyncDevice();
175 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
176 PetscCall(PetscTime(&tend));
177 avgTime = (tend - tstart) * 1e6 / n; /* microseconds */
178 PetscCall(PetscLogStagePop());
179
180 PetscCall(MatMultEqual(C, C2, 8, &equal)); /* Not MatEqual() since C and C2 are not necessarily bitwise equal */
181
182 PetscCheck(equal, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Matrix production error");
183 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "MatProductNumeric() average time (us) with %d MPI ranks = %8.2f\n", size, avgTime));
184
185 PetscCall(MatDestroy(&A));
186 if (flgP) PetscCall(MatDestroy(&P));
187 PetscCall(MatDestroy(&C));
188
189 PetscCall(MatDestroy(&A2));
190 if (flgP) PetscCall(MatDestroy(&P2));
191 PetscCall(MatDestroy(&C2));
192
193 PetscCall(PetscFinalize());
194 return 0;
195 }
196
197 /*TEST
198
199 testset:
200 args: -n 2 -A ${DATAFILESPATH}/matrices/small
201 nsize: 1
202 filter: grep "DOES_NOT_EXIST"
203 output_file: output/empty.out
204 requires: datafilespath !complex double !defined(PETSC_USE_64BIT_INDICES) kokkos_kernels
205
206 test:
207 suffix: 1
208 requires: cuda
209 args: -mat_type aijcusparse
210
211 test:
212 suffix: 2
213 args: -mat_type aijkokkos
214
215 test:
216 suffix: 3
217 requires: hip
218 args: -mat_type aijhipsparse
219
220 TEST*/
221