1 static char help[] = "Tests MatMult(), MatMultAdd(), MatMultTranspose().\n\
2 Also MatMultTransposeAdd(), MatScale(), MatGetDiagonal(), MatDiagonalScale(), MatZeroEntries() and MatDuplicate().\n\n";
3
4 #include <petscmat.h>
5
main(int argc,char ** args)6 int main(int argc, char **args)
7 {
8 Mat C;
9 Vec s, u, w, x, y, z;
10 PetscInt i, j, m = 8, n, rstart, rend, vstart, vend;
11 PetscScalar one = 1.0, negone = -1.0, v, alpha = 0.1;
12 PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON;
13 PetscBool flg;
14
15 PetscFunctionBeginUser;
16 PetscCall(PetscInitialize(&argc, &args, NULL, help));
17 PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_COMMON));
18 PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
19 n = m;
20 PetscCall(PetscOptionsHasName(NULL, NULL, "-rectA", &flg));
21 if (flg) n += 2;
22 PetscCall(PetscOptionsHasName(NULL, NULL, "-rectB", &flg));
23 if (flg) n -= 2;
24
25 /* ---------- Assemble matrix and vectors ----------- */
26
27 PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
28 PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m, n));
29 PetscCall(MatSetFromOptions(C));
30 PetscCall(MatSetUp(C));
31 PetscCall(MatGetOwnershipRange(C, &rstart, &rend));
32 PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
33 PetscCall(VecSetSizes(x, PETSC_DECIDE, m));
34 PetscCall(VecSetFromOptions(x));
35 PetscCall(VecDuplicate(x, &z));
36 PetscCall(VecDuplicate(x, &w));
37 PetscCall(VecCreate(PETSC_COMM_WORLD, &y));
38 PetscCall(VecSetSizes(y, PETSC_DECIDE, n));
39 PetscCall(VecSetFromOptions(y));
40 PetscCall(VecDuplicate(y, &u));
41 PetscCall(VecDuplicate(y, &s));
42 PetscCall(VecGetOwnershipRange(y, &vstart, &vend));
43
44 /* Assembly */
45 for (i = rstart; i < rend; i++) {
46 v = 100 * (i + 1);
47 PetscCall(VecSetValues(z, 1, &i, &v, INSERT_VALUES));
48 for (j = 0; j < n; j++) {
49 v = 10 * (i + 1) + j + 1;
50 PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES));
51 }
52 }
53
54 /* Flush off proc Vec values and do more assembly */
55 PetscCall(VecAssemblyBegin(z));
56 for (i = vstart; i < vend; i++) {
57 v = one * ((PetscReal)i);
58 PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES));
59 v = 100.0 * i;
60 PetscCall(VecSetValues(u, 1, &i, &v, INSERT_VALUES));
61 }
62
63 /* Flush off proc Mat values and do more assembly */
64 PetscCall(MatAssemblyBegin(C, MAT_FLUSH_ASSEMBLY));
65 for (i = rstart; i < rend; i++) {
66 for (j = 0; j < n; j++) {
67 v = 10 * (i + 1) + j + 1;
68 PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES));
69 }
70 }
71 /* Try overlap Coomunication with the next stage XXXSetValues */
72 PetscCall(VecAssemblyEnd(z));
73
74 PetscCall(MatAssemblyEnd(C, MAT_FLUSH_ASSEMBLY));
75 CHKMEMQ;
76 /* The Assembly for the second Stage */
77 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
78 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
79 PetscCall(VecAssemblyBegin(y));
80 PetscCall(VecAssemblyEnd(y));
81 PetscCall(MatScale(C, alpha));
82 PetscCall(VecAssemblyBegin(u));
83 PetscCall(VecAssemblyEnd(u));
84 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMult()\n"));
85 CHKMEMQ;
86 PetscCall(MatMult(C, y, x));
87 CHKMEMQ;
88 PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
89 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultAdd()\n"));
90 PetscCall(MatMultAdd(C, y, z, w));
91 PetscCall(VecAXPY(x, one, z));
92 PetscCall(VecAXPY(x, negone, w));
93 PetscCall(VecNorm(x, NORM_2, &norm));
94 if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm));
95
96 /* ------- Test MatMultTranspose(), MatMultTransposeAdd() ------- */
97
98 for (i = rstart; i < rend; i++) {
99 v = one * ((PetscReal)i);
100 PetscCall(VecSetValues(x, 1, &i, &v, INSERT_VALUES));
101 }
102 PetscCall(VecAssemblyBegin(x));
103 PetscCall(VecAssemblyEnd(x));
104 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTranspose()\n"));
105 PetscCall(MatMultTranspose(C, x, y));
106 PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
107
108 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTransposeAdd()\n"));
109 PetscCall(MatMultTransposeAdd(C, x, u, s));
110 PetscCall(VecAXPY(y, one, u));
111 PetscCall(VecAXPY(y, negone, s));
112 PetscCall(VecNorm(y, NORM_2, &norm));
113 if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm));
114
115 /* -------------------- Test MatGetDiagonal() ------------------ */
116
117 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatGetDiagonal(), MatDiagonalScale()\n"));
118 PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
119 PetscCall(VecSet(x, one));
120 PetscCall(MatGetDiagonal(C, x));
121 PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
122 for (i = vstart; i < vend; i++) {
123 v = one * ((PetscReal)(i + 1));
124 PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES));
125 }
126
127 /* -------------------- Test () MatDiagonalScale ------------------ */
128 PetscCall(PetscOptionsHasName(NULL, NULL, "-test_diagonalscale", &flg));
129 if (flg) {
130 PetscCall(MatDiagonalScale(C, x, y));
131 PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
132 }
133 /* -------------------- Test () MatZeroEntries() and MatDuplicate() ------------------ */
134 PetscCall(PetscOptionsHasName(NULL, NULL, "-test_zeroentries", &flg));
135 if (flg) {
136 Mat D;
137 PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &D));
138 PetscCall(MatZeroEntries(D));
139 PetscCall(MatView(D, PETSC_VIEWER_STDOUT_WORLD));
140 PetscCall(MatDestroy(&D));
141 }
142 /* Free data structures */
143 PetscCall(VecDestroy(&u));
144 PetscCall(VecDestroy(&s));
145 PetscCall(VecDestroy(&w));
146 PetscCall(VecDestroy(&x));
147 PetscCall(VecDestroy(&y));
148 PetscCall(VecDestroy(&z));
149 PetscCall(MatDestroy(&C));
150
151 PetscCall(PetscFinalize());
152 return 0;
153 }
154
155 /*TEST
156
157 test:
158 suffix: 11_A
159 args: -mat_type seqaij -rectA
160 filter: grep -v type
161
162 test:
163 args: -mat_type seqdense -rectA
164 suffix: 12_A
165
166 test:
167 args: -mat_type seqaij -rectB
168 suffix: 11_B
169 filter: grep -v type
170
171 test:
172 args: -mat_type seqdense -rectB
173 suffix: 12_B
174
175 test:
176 suffix: 21
177 args: -mat_type mpiaij
178 filter: grep -v type
179
180 test:
181 suffix: 22
182 args: -mat_type mpidense
183
184 test:
185 suffix: 23
186 nsize: 3
187 args: -mat_type mpiaij
188 filter: grep -v type
189
190 test:
191 suffix: 24
192 nsize: 3
193 args: -mat_type mpidense
194
195 test:
196 suffix: 2_aijcusparse_1
197 args: -mat_type mpiaijcusparse -vec_type cuda
198 filter: grep -v type
199 output_file: output/ex5_21.out
200 requires: cuda
201
202 test:
203 nsize: 3
204 suffix: 2_aijcusparse_2
205 filter: grep -v type
206 args: -mat_type mpiaijcusparse -vec_type cuda
207 args: -sf_type {{basic neighbor}}
208 output_file: output/ex5_23.out
209 requires: cuda
210
211 test:
212 nsize: 3
213 suffix: 2_aijcusparse_3
214 filter: grep -v type
215 args: -mat_type mpiaijcusparse -vec_type cuda
216 args: -sf_type {{basic neighbor}}
217 output_file: output/ex5_23.out
218 requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE)
219
220 test:
221 suffix: 31
222 args: -mat_type mpiaij -test_diagonalscale
223 filter: grep -v type
224
225 test:
226 suffix: 32
227 args: -mat_type mpibaij -test_diagonalscale
228
229 test:
230 suffix: 33
231 nsize: 3
232 args: -mat_type mpiaij -test_diagonalscale
233 filter: grep -v type
234
235 test:
236 suffix: 34
237 nsize: 3
238 args: -mat_type mpibaij -test_diagonalscale
239
240 test:
241 suffix: 3_aijcusparse_1
242 args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale
243 filter: grep -v type
244 output_file: output/ex5_31.out
245 requires: cuda
246
247 test:
248 suffix: 3_aijcusparse_2
249 nsize: 3
250 args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale
251 filter: grep -v type
252 output_file: output/ex5_33.out
253 requires: cuda
254
255 test:
256 suffix: 3_kokkos
257 nsize: 3
258 args: -mat_type mpiaijkokkos -vec_type kokkos -test_diagonalscale
259 filter: grep -v type
260 output_file: output/ex5_33.out
261 requires: kokkos_kernels
262
263 test:
264 suffix: aijcusparse_1
265 args: -mat_type seqaijcusparse -vec_type cuda -rectA
266 filter: grep -v type
267 output_file: output/ex5_11_A.out
268 requires: cuda
269
270 test:
271 suffix: aijcusparse_2
272 args: -mat_type seqaijcusparse -vec_type cuda -rectB
273 filter: grep -v type
274 output_file: output/ex5_11_B.out
275 requires: cuda
276
277 test:
278 suffix: sell_1
279 args: -mat_type sell -mat_sell_slice_height 8
280 output_file: output/ex5_41.out
281
282 test:
283 suffix: sell_2
284 nsize: 3
285 args: -mat_type sell -mat_sell_slice_height 8
286 output_file: output/ex5_43.out
287
288 test:
289 suffix: sell_3
290 args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8
291 output_file: output/ex5_51.out
292
293 test:
294 suffix: sell_4
295 nsize: 3
296 args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8
297 output_file: output/ex5_53.out
298
299 test:
300 suffix: sell_5
301 nsize: 3
302 args: -mat_type sellcuda -vec_type cuda -test_diagonalscale -test_zeroentries
303 output_file: output/ex5_55.out
304 requires: cuda !complex
305
306 test:
307 suffix: sell_6
308 nsize: 3
309 args: -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{1 2 3 4 5 6}}
310 output_file: output/ex5_56.out
311 requires: cuda !complex
312
313 test:
314 suffix: sell_7
315 args: -m 32 -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{0 7 9}} -mat_sell_spmv_cuda_blocky {{2 4 8 16 32}}
316 output_file: output/ex5_57.out
317 requires: cuda !complex !single
318
319 test:
320 suffix: sell_8
321 nsize: 3
322 args: -mat_type sellhip -vec_type hip -test_diagonalscale -test_zeroentries
323 filter: sed -e "s/hip/cuda/g"
324 output_file: output/ex5_55.out
325 requires: hip !complex
326
327 test:
328 suffix: sell_9
329 nsize: 3
330 args: -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{1 2 3 4 5 6}}
331 filter: sed -e "s/hip/cuda/g"
332 output_file: output/ex5_56.out
333 requires: hip !complex
334
335 test:
336 suffix: sell_10
337 args: -m 32 -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{0 7 9}} -mat_sell_spmv_hip_blocky {{2 4 8 16 32}}
338 filter: sed -e "s/hip/cuda/g"
339 output_file: output/ex5_57.out
340 requires: hip !complex !single
341 TEST*/
342