1a90d8e38SSatish Balay static char help[] = "Benchmarking MatProduct with AIJ and its subclass matrix types\n";
2a90d8e38SSatish Balay
3a90d8e38SSatish Balay /*
4a90d8e38SSatish Balay Usage:
5*a8cf87e0SJunchao Zhang mpiexec -n <np> ./ex6k
6a90d8e38SSatish Balay -A <filename> : input PETSc binary file for matrix A; one can convert a file from MatrixMarket using mat/tests/ex72.c
7a90d8e38SSatish Balay -P <filename> : input PETSc binary file for matrix P; optional, if not given, P = A
8a90d8e38SSatish Balay -mat_type <str> : aij or its subclass. Default is aij.
9a90d8e38SSatish Balay -prod_type <str> : AP, AtP, APt, PtAP or PAPt. Default is AP.
10a90d8e38SSatish Balay -n <num> : run MatProductNumeric() this many times and report average time. Default is 100.
11a90d8e38SSatish Balay
12a90d8e38SSatish Balay Notes:
13a90d8e38SSatish Balay It uses CPU-timer to measure the time.
14a90d8e38SSatish Balay
15a90d8e38SSatish Balay Examples:
16a90d8e38SSatish Balay On OLCF Summit (with GPU-aware MPI)
17a90d8e38SSatish Balay # 6 MPI ranks:
18a90d8e38SSatish Balay # 6 resource sets (-n 6), 1 MPI rank per RS (-a 1), 7 CPU cores per RS (-c 7), and 1 GPU per RS (-g 1), 6 RSs per node (-r 6)
19a90d8e38SSatish Balay jsrun --smpiargs "-gpu" -n 6 -a 1 -c 7 -g 1 -r 6 ./ex6k -A cage12.aij -mat_type aijcusparse
20a90d8e38SSatish Balay
21a90d8e38SSatish Balay # 1 MPI rank
22a90d8e38SSatish Balay jsrun --smpiargs "-gpu" -n 1 -a 1 -c 7 -g 1 -r 1 ./ex6k -A cage12.aij -mat_type aijcusparse
23a90d8e38SSatish Balay
24a90d8e38SSatish Balay On OLCF Crusher:
25a90d8e38SSatish Balay # 1 MPI rank
26a90d8e38SSatish Balay # run with 1 node (-N1), 1 mpi rank (-n1), 2 hardware threads per rank (-c2)
27a90d8e38SSatish Balay srun -N1 -n1 -c2 --gpus-per-node=8 --gpu-bind=closest ./ex6k -A HV15R.aij -mat_type aijkokkos
28a90d8e38SSatish Balay
29a90d8e38SSatish Balay # 8 MPI ranks
30a90d8e38SSatish Balay srun -N1 -n8 -c2 --gpus-per-node=8 --gpu-bind=closest ./ex6k -A HV15R.aij -mat_type aijkokkos
31a90d8e38SSatish Balay */
32a90d8e38SSatish Balay #include <petscmat.h>
33a90d8e38SSatish Balay #include <petscdevice.h>
34a90d8e38SSatish Balay
35a90d8e38SSatish Balay #if defined(PETSC_HAVE_CUDA)
36a90d8e38SSatish Balay #include <petscdevice_cuda.h>
37a90d8e38SSatish Balay #define SyncDevice() PetscCallCUDA(cudaDeviceSynchronize())
38a90d8e38SSatish Balay #elif defined(PETSC_HAVE_HIP)
39a90d8e38SSatish Balay #include <petscdevice_hip.h>
40a90d8e38SSatish Balay #define SyncDevice() PetscCallHIP(hipDeviceSynchronize())
41a90d8e38SSatish Balay #elif defined(PETSC_HAVE_KOKKOS)
42a90d8e38SSatish Balay #include <Kokkos_Core.hpp>
43a90d8e38SSatish Balay #define SyncDevice() Kokkos::fence()
44a90d8e38SSatish Balay #else
45a90d8e38SSatish Balay #define SyncDevice()
46a90d8e38SSatish Balay #endif
47a90d8e38SSatish Balay
main(int argc,char ** args)48a90d8e38SSatish Balay int main(int argc, char **args)
49a90d8e38SSatish Balay {
50a90d8e38SSatish Balay Mat A, P, C;
51a90d8e38SSatish Balay Mat A2, P2, C2; /* Shadow matrices (of MATAIJ) of A,P,C for initialization and validation */
52a90d8e38SSatish Balay char matTypeStr[64], prodTypeStr[32];
53a90d8e38SSatish Balay char fileA[PETSC_MAX_PATH_LEN], fileP[PETSC_MAX_PATH_LEN];
54a90d8e38SSatish Balay PetscViewer fdA, fdP;
55a90d8e38SSatish Balay PetscBool flg, flgA, flgP, equal = PETSC_FALSE;
56a90d8e38SSatish Balay PetscLogStage stage;
57a90d8e38SSatish Balay PetscInt i, n = 100, nskip = 2, M, N;
58a90d8e38SSatish Balay MatInfo info;
59a90d8e38SSatish Balay PetscLogDouble tstart = 0, tend = 0, avgTime;
60a90d8e38SSatish Balay PetscMPIInt size;
61a90d8e38SSatish Balay MatProductType prodType;
62a90d8e38SSatish Balay PetscBool isAP, isAtP, isAPt, isPtAP, isPAPt;
63a90d8e38SSatish Balay
64a90d8e38SSatish Balay PetscFunctionBeginUser;
65a90d8e38SSatish Balay PetscCall(PetscInitialize(&argc, &args, nullptr, help));
66a90d8e38SSatish Balay PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
67a90d8e38SSatish Balay
68a90d8e38SSatish Balay /* Read options -n */
69a90d8e38SSatish Balay PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
70a90d8e38SSatish Balay
71a90d8e38SSatish Balay /* Load the matrix from a binary file */
72a90d8e38SSatish Balay PetscCall(PetscOptionsGetString(NULL, NULL, "-A", fileA, PETSC_MAX_PATH_LEN, &flgA));
73a90d8e38SSatish Balay PetscCall(PetscOptionsGetString(NULL, NULL, "-P", fileP, PETSC_MAX_PATH_LEN, &flgP));
74a90d8e38SSatish Balay PetscCheck(flgA, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must give a PETSc matrix binary file with the -A option");
75a90d8e38SSatish Balay
76a90d8e38SSatish Balay PetscCall(PetscOptionsGetString(NULL, NULL, "-mat_type", matTypeStr, sizeof(matTypeStr), &flg));
77a90d8e38SSatish Balay if (!flg) PetscCall(PetscStrncpy(matTypeStr, MATAIJ, sizeof(matTypeStr))); /* Inject the default if not provided */
78a90d8e38SSatish Balay
79a90d8e38SSatish Balay PetscCall(PetscOptionsGetString(NULL, NULL, "-prod_type", prodTypeStr, sizeof(prodTypeStr), &flg));
80a90d8e38SSatish Balay if (!flg) PetscCall(PetscStrncpy(prodTypeStr, "AP", sizeof(prodTypeStr))); /* Inject the default if not provided */
81a90d8e38SSatish Balay
82a90d8e38SSatish Balay PetscCall(PetscStrcmp(prodTypeStr, "AP", &isAP));
83a90d8e38SSatish Balay PetscCall(PetscStrcmp(prodTypeStr, "AtP", &isAtP));
84a90d8e38SSatish Balay PetscCall(PetscStrcmp(prodTypeStr, "APt", &isAPt));
85a90d8e38SSatish Balay PetscCall(PetscStrcmp(prodTypeStr, "PtAP", &isPtAP));
86a90d8e38SSatish Balay PetscCall(PetscStrcmp(prodTypeStr, "PAPt", &isPAPt));
87a90d8e38SSatish Balay
88a90d8e38SSatish Balay if (isAP) prodType = MATPRODUCT_AB;
89a90d8e38SSatish Balay else if (isAtP) prodType = MATPRODUCT_AtB;
90a90d8e38SSatish Balay else if (isAPt) prodType = MATPRODUCT_ABt;
91a90d8e38SSatish Balay else if (isPtAP) prodType = MATPRODUCT_PtAP;
92a90d8e38SSatish Balay else if (isPAPt) prodType = MATPRODUCT_RARt;
93a90d8e38SSatish Balay else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Unsupported product type %s", prodTypeStr);
94a90d8e38SSatish Balay
95a90d8e38SSatish Balay /* Read the matrix file to A2 */
96a90d8e38SSatish Balay PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, fileA, FILE_MODE_READ, &fdA));
97a90d8e38SSatish Balay PetscCall(MatCreate(PETSC_COMM_WORLD, &A2));
98a90d8e38SSatish Balay PetscCall(MatSetType(A2, MATAIJ));
99a90d8e38SSatish Balay PetscCall(MatLoad(A2, fdA));
100a90d8e38SSatish Balay PetscCall(PetscViewerDestroy(&fdA));
101a90d8e38SSatish Balay
102a90d8e38SSatish Balay PetscCall(MatGetSize(A2, &M, &N));
103a90d8e38SSatish Balay PetscCall(MatGetInfo(A2, MAT_GLOBAL_SUM, &info));
104a90d8e38SSatish Balay PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix A: %s, %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", fileA, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
105a90d8e38SSatish Balay
106a90d8e38SSatish Balay /* Copy A2 to A and convert A to the specified type */
107a90d8e38SSatish Balay PetscCall(MatDuplicate(A2, MAT_COPY_VALUES, &A));
108a90d8e38SSatish Balay PetscCall(MatConvert(A, matTypeStr, MAT_INPLACE_MATRIX, &A));
109a90d8e38SSatish Balay
110a90d8e38SSatish Balay /* Init P, P2 similarly */
111a90d8e38SSatish Balay if (flgP) { /* If user provided P */
112a90d8e38SSatish Balay PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, fileP, FILE_MODE_READ, &fdP));
113a90d8e38SSatish Balay PetscCall(MatCreate(PETSC_COMM_WORLD, &P2));
114a90d8e38SSatish Balay PetscCall(MatSetType(P2, MATAIJ));
115a90d8e38SSatish Balay PetscCall(MatLoad(P2, fdP));
116a90d8e38SSatish Balay PetscCall(PetscViewerDestroy(&fdP));
117a90d8e38SSatish Balay
118a90d8e38SSatish Balay PetscCall(MatDuplicate(P2, MAT_COPY_VALUES, &P));
119a90d8e38SSatish Balay PetscCall(MatConvert(P, matTypeStr, MAT_INPLACE_MATRIX, &P));
120a90d8e38SSatish Balay
121a90d8e38SSatish Balay PetscCall(MatGetSize(P2, &M, &N));
122a90d8e38SSatish Balay PetscCall(MatGetInfo(P2, MAT_GLOBAL_SUM, &info));
123a90d8e38SSatish Balay PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix P: %s, %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", fileP, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
124a90d8e38SSatish Balay } else { /* otherwise just let P = A */
125a90d8e38SSatish Balay PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix P = A\n"));
126a90d8e38SSatish Balay P2 = A2;
127a90d8e38SSatish Balay P = A;
128a90d8e38SSatish Balay }
129a90d8e38SSatish Balay
130a90d8e38SSatish Balay /* Compute the reference C2 */
131a90d8e38SSatish Balay PetscCall(MatProductCreate(A2, P2, NULL, &C2));
132a90d8e38SSatish Balay PetscCall(MatProductSetType(C2, prodType));
133a90d8e38SSatish Balay PetscCall(MatProductSetFill(C2, PETSC_DEFAULT));
134a90d8e38SSatish Balay PetscCall(MatProductSetFromOptions(C2));
135a90d8e38SSatish Balay PetscCall(MatProductSymbolic(C2));
136a90d8e38SSatish Balay PetscCall(MatProductNumeric(C2));
137a90d8e38SSatish Balay PetscCall(MatGetSize(C2, &M, &N));
138a90d8e38SSatish Balay PetscCall(MatGetInfo(C2, MAT_GLOBAL_SUM, &info));
139a90d8e38SSatish Balay PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Mat product C = %s: %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", prodTypeStr, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M));
140a90d8e38SSatish Balay
141a90d8e38SSatish Balay /* Compute C */
142a90d8e38SSatish Balay PetscCall(MatProductCreate(A, P, NULL, &C));
143a90d8e38SSatish Balay PetscCall(MatProductSetType(C, prodType));
144a90d8e38SSatish Balay PetscCall(MatProductSetAlgorithm(C, MATPRODUCTALGORITHMBACKEND));
145a90d8e38SSatish Balay PetscCall(MatProductSetFill(C, PETSC_DEFAULT));
146a90d8e38SSatish Balay PetscCall(MatProductSetFromOptions(C));
147a90d8e38SSatish Balay
148a90d8e38SSatish Balay /* Measure MatProductSymbolic */
149a90d8e38SSatish Balay PetscCall(PetscLogStageRegister("MatProductSymbolic", &stage));
150a90d8e38SSatish Balay PetscCall(PetscLogStagePush(stage));
151a90d8e38SSatish Balay SyncDevice();
152a90d8e38SSatish Balay PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
153a90d8e38SSatish Balay PetscCall(PetscTime(&tstart));
154a90d8e38SSatish Balay PetscCall(MatProductSymbolic(C));
155a90d8e38SSatish Balay SyncDevice();
156a90d8e38SSatish Balay PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
157a90d8e38SSatish Balay PetscCall(PetscTime(&tend));
158a90d8e38SSatish Balay avgTime = (tend - tstart) * 1e6; /* microseconds */
159a90d8e38SSatish Balay PetscCall(PetscLogStagePop());
160a90d8e38SSatish Balay PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nMatProductSymbolic() time (us) with %d MPI ranks = %8.2f\n", size, avgTime));
161a90d8e38SSatish Balay
162a90d8e38SSatish Balay /* Measure MatProductNumeric */
163a90d8e38SSatish Balay PetscCall(PetscLogStageRegister("MatProductNumeric", &stage));
164a90d8e38SSatish Balay for (i = 0; i < n + nskip; i++) {
165a90d8e38SSatish Balay if (i == nskip) {
166a90d8e38SSatish Balay SyncDevice();
167a90d8e38SSatish Balay PetscCall(PetscLogStagePush(stage));
168a90d8e38SSatish Balay PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
169a90d8e38SSatish Balay PetscCall(PetscTime(&tstart));
170a90d8e38SSatish Balay }
171a90d8e38SSatish Balay PetscCall(MatProductReplaceMats(A, P, NULL, C));
172a90d8e38SSatish Balay PetscCall(MatProductNumeric(C));
173a90d8e38SSatish Balay }
174a90d8e38SSatish Balay SyncDevice();
175a90d8e38SSatish Balay PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
176a90d8e38SSatish Balay PetscCall(PetscTime(&tend));
177a90d8e38SSatish Balay avgTime = (tend - tstart) * 1e6 / n; /* microseconds */
178a90d8e38SSatish Balay PetscCall(PetscLogStagePop());
179a90d8e38SSatish Balay
180a90d8e38SSatish Balay PetscCall(MatMultEqual(C, C2, 8, &equal)); /* Not MatEqual() since C and C2 are not necessarily bitwise equal */
181a90d8e38SSatish Balay
182a90d8e38SSatish Balay PetscCheck(equal, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Matrix production error");
183a90d8e38SSatish Balay PetscCall(PetscPrintf(PETSC_COMM_WORLD, "MatProductNumeric() average time (us) with %d MPI ranks = %8.2f\n", size, avgTime));
184a90d8e38SSatish Balay
185a90d8e38SSatish Balay PetscCall(MatDestroy(&A));
186a90d8e38SSatish Balay if (flgP) PetscCall(MatDestroy(&P));
187a90d8e38SSatish Balay PetscCall(MatDestroy(&C));
188a90d8e38SSatish Balay
189a90d8e38SSatish Balay PetscCall(MatDestroy(&A2));
190a90d8e38SSatish Balay if (flgP) PetscCall(MatDestroy(&P2));
191a90d8e38SSatish Balay PetscCall(MatDestroy(&C2));
192a90d8e38SSatish Balay
193a90d8e38SSatish Balay PetscCall(PetscFinalize());
194a90d8e38SSatish Balay return 0;
195a90d8e38SSatish Balay }
196a90d8e38SSatish Balay
197a90d8e38SSatish Balay /*TEST
198a90d8e38SSatish Balay
199a90d8e38SSatish Balay testset:
200a90d8e38SSatish Balay args: -n 2 -A ${DATAFILESPATH}/matrices/small
201a90d8e38SSatish Balay nsize: 1
202a90d8e38SSatish Balay filter: grep "DOES_NOT_EXIST"
203a90d8e38SSatish Balay output_file: output/empty.out
204a90d8e38SSatish Balay requires: datafilespath !complex double !defined(PETSC_USE_64BIT_INDICES) kokkos_kernels
205a90d8e38SSatish Balay
206a90d8e38SSatish Balay test:
207a90d8e38SSatish Balay suffix: 1
208a90d8e38SSatish Balay requires: cuda
209a90d8e38SSatish Balay args: -mat_type aijcusparse
210a90d8e38SSatish Balay
211a90d8e38SSatish Balay test:
212a90d8e38SSatish Balay suffix: 2
213a90d8e38SSatish Balay args: -mat_type aijkokkos
214a90d8e38SSatish Balay
215a90d8e38SSatish Balay test:
216a90d8e38SSatish Balay suffix: 3
217a90d8e38SSatish Balay requires: hip
218a90d8e38SSatish Balay args: -mat_type aijhipsparse
219a90d8e38SSatish Balay
220a90d8e38SSatish Balay TEST*/
221