1c4762a1bSJed Brown static char help[] = "Tests MatMult(), MatMultAdd(), MatMultTranspose().\n\
25f9962eeSHong Zhang Also MatMultTransposeAdd(), MatScale(), MatGetDiagonal(), MatDiagonalScale(), MatZeroEntries() and MatDuplicate().\n\n";
3c4762a1bSJed Brown
4c4762a1bSJed Brown #include <petscmat.h>
5c4762a1bSJed Brown
main(int argc,char ** args)6d71ae5a4SJacob Faibussowitsch int main(int argc, char **args)
7d71ae5a4SJacob Faibussowitsch {
8c4762a1bSJed Brown Mat C;
9c4762a1bSJed Brown Vec s, u, w, x, y, z;
10c4762a1bSJed Brown PetscInt i, j, m = 8, n, rstart, rend, vstart, vend;
11c4762a1bSJed Brown PetscScalar one = 1.0, negone = -1.0, v, alpha = 0.1;
12c4762a1bSJed Brown PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON;
13c4762a1bSJed Brown PetscBool flg;
14c4762a1bSJed Brown
15327415f7SBarry Smith PetscFunctionBeginUser;
16*c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &args, NULL, help));
179566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_COMMON));
189566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
19c4762a1bSJed Brown n = m;
209566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-rectA", &flg));
21c4762a1bSJed Brown if (flg) n += 2;
229566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-rectB", &flg));
23c4762a1bSJed Brown if (flg) n -= 2;
24c4762a1bSJed Brown
25c4762a1bSJed Brown /* ---------- Assemble matrix and vectors ----------- */
26c4762a1bSJed Brown
279566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
289566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m, n));
299566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(C));
309566063dSJacob Faibussowitsch PetscCall(MatSetUp(C));
319566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(C, &rstart, &rend));
329566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
339566063dSJacob Faibussowitsch PetscCall(VecSetSizes(x, PETSC_DECIDE, m));
349566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(x));
359566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x, &z));
369566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x, &w));
379566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &y));
389566063dSJacob Faibussowitsch PetscCall(VecSetSizes(y, PETSC_DECIDE, n));
399566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(y));
409566063dSJacob Faibussowitsch PetscCall(VecDuplicate(y, &u));
419566063dSJacob Faibussowitsch PetscCall(VecDuplicate(y, &s));
429566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &vstart, &vend));
43c4762a1bSJed Brown
44c4762a1bSJed Brown /* Assembly */
45c4762a1bSJed Brown for (i = rstart; i < rend; i++) {
46c4762a1bSJed Brown v = 100 * (i + 1);
479566063dSJacob Faibussowitsch PetscCall(VecSetValues(z, 1, &i, &v, INSERT_VALUES));
48c4762a1bSJed Brown for (j = 0; j < n; j++) {
49c4762a1bSJed Brown v = 10 * (i + 1) + j + 1;
509566063dSJacob Faibussowitsch PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES));
51c4762a1bSJed Brown }
52c4762a1bSJed Brown }
53c4762a1bSJed Brown
54c4762a1bSJed Brown /* Flush off proc Vec values and do more assembly */
559566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(z));
56c4762a1bSJed Brown for (i = vstart; i < vend; i++) {
57c4762a1bSJed Brown v = one * ((PetscReal)i);
589566063dSJacob Faibussowitsch PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES));
59c4762a1bSJed Brown v = 100.0 * i;
609566063dSJacob Faibussowitsch PetscCall(VecSetValues(u, 1, &i, &v, INSERT_VALUES));
61c4762a1bSJed Brown }
62c4762a1bSJed Brown
63c4762a1bSJed Brown /* Flush off proc Mat values and do more assembly */
649566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FLUSH_ASSEMBLY));
65c4762a1bSJed Brown for (i = rstart; i < rend; i++) {
66c4762a1bSJed Brown for (j = 0; j < n; j++) {
67c4762a1bSJed Brown v = 10 * (i + 1) + j + 1;
689566063dSJacob Faibussowitsch PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES));
69c4762a1bSJed Brown }
70c4762a1bSJed Brown }
71c4762a1bSJed Brown /* Try overlap Coomunication with the next stage XXXSetValues */
729566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(z));
73c4762a1bSJed Brown
749566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FLUSH_ASSEMBLY));
75c4762a1bSJed Brown CHKMEMQ;
76c4762a1bSJed Brown /* The Assembly for the second Stage */
779566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
789566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
799566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(y));
809566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(y));
819566063dSJacob Faibussowitsch PetscCall(MatScale(C, alpha));
829566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(u));
839566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(u));
849566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMult()\n"));
85c4762a1bSJed Brown CHKMEMQ;
869566063dSJacob Faibussowitsch PetscCall(MatMult(C, y, x));
87c4762a1bSJed Brown CHKMEMQ;
889566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
899566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultAdd()\n"));
909566063dSJacob Faibussowitsch PetscCall(MatMultAdd(C, y, z, w));
919566063dSJacob Faibussowitsch PetscCall(VecAXPY(x, one, z));
929566063dSJacob Faibussowitsch PetscCall(VecAXPY(x, negone, w));
939566063dSJacob Faibussowitsch PetscCall(VecNorm(x, NORM_2, &norm));
9448a46eb9SPierre Jolivet if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm));
95c4762a1bSJed Brown
96c4762a1bSJed Brown /* ------- Test MatMultTranspose(), MatMultTransposeAdd() ------- */
97c4762a1bSJed Brown
98c4762a1bSJed Brown for (i = rstart; i < rend; i++) {
99c4762a1bSJed Brown v = one * ((PetscReal)i);
1009566063dSJacob Faibussowitsch PetscCall(VecSetValues(x, 1, &i, &v, INSERT_VALUES));
101c4762a1bSJed Brown }
1029566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x));
1039566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x));
1049566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTranspose()\n"));
1059566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(C, x, y));
1069566063dSJacob Faibussowitsch PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
107c4762a1bSJed Brown
1089566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTransposeAdd()\n"));
1099566063dSJacob Faibussowitsch PetscCall(MatMultTransposeAdd(C, x, u, s));
1109566063dSJacob Faibussowitsch PetscCall(VecAXPY(y, one, u));
1119566063dSJacob Faibussowitsch PetscCall(VecAXPY(y, negone, s));
1129566063dSJacob Faibussowitsch PetscCall(VecNorm(y, NORM_2, &norm));
11348a46eb9SPierre Jolivet if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm));
114c4762a1bSJed Brown
115c4762a1bSJed Brown /* -------------------- Test MatGetDiagonal() ------------------ */
116c4762a1bSJed Brown
1179566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatGetDiagonal(), MatDiagonalScale()\n"));
1189566063dSJacob Faibussowitsch PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
1199566063dSJacob Faibussowitsch PetscCall(VecSet(x, one));
1209566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(C, x));
1219566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
122c4762a1bSJed Brown for (i = vstart; i < vend; i++) {
123c4762a1bSJed Brown v = one * ((PetscReal)(i + 1));
1249566063dSJacob Faibussowitsch PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES));
125c4762a1bSJed Brown }
126c4762a1bSJed Brown
127c4762a1bSJed Brown /* -------------------- Test () MatDiagonalScale ------------------ */
1289566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-test_diagonalscale", &flg));
129c4762a1bSJed Brown if (flg) {
1309566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(C, x, y));
1319566063dSJacob Faibussowitsch PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
132c4762a1bSJed Brown }
1335f9962eeSHong Zhang /* -------------------- Test () MatZeroEntries() and MatDuplicate() ------------------ */
1345f9962eeSHong Zhang PetscCall(PetscOptionsHasName(NULL, NULL, "-test_zeroentries", &flg));
1355f9962eeSHong Zhang if (flg) {
1365f9962eeSHong Zhang Mat D;
1375f9962eeSHong Zhang PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &D));
1385f9962eeSHong Zhang PetscCall(MatZeroEntries(D));
1395f9962eeSHong Zhang PetscCall(MatView(D, PETSC_VIEWER_STDOUT_WORLD));
1405f9962eeSHong Zhang PetscCall(MatDestroy(&D));
1415f9962eeSHong Zhang }
142c4762a1bSJed Brown /* Free data structures */
1439371c9d4SSatish Balay PetscCall(VecDestroy(&u));
1449371c9d4SSatish Balay PetscCall(VecDestroy(&s));
1459371c9d4SSatish Balay PetscCall(VecDestroy(&w));
1469371c9d4SSatish Balay PetscCall(VecDestroy(&x));
1479371c9d4SSatish Balay PetscCall(VecDestroy(&y));
1489371c9d4SSatish Balay PetscCall(VecDestroy(&z));
1499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C));
150c4762a1bSJed Brown
1519566063dSJacob Faibussowitsch PetscCall(PetscFinalize());
152b122ec5aSJacob Faibussowitsch return 0;
153c4762a1bSJed Brown }
154c4762a1bSJed Brown
155c4762a1bSJed Brown /*TEST
156c4762a1bSJed Brown
157c4762a1bSJed Brown test:
158c4762a1bSJed Brown suffix: 11_A
159c4762a1bSJed Brown args: -mat_type seqaij -rectA
160c4762a1bSJed Brown filter: grep -v type
161c4762a1bSJed Brown
162c4762a1bSJed Brown test:
163c4762a1bSJed Brown args: -mat_type seqdense -rectA
164c4762a1bSJed Brown suffix: 12_A
165c4762a1bSJed Brown
166c4762a1bSJed Brown test:
167c4762a1bSJed Brown args: -mat_type seqaij -rectB
168c4762a1bSJed Brown suffix: 11_B
169c4762a1bSJed Brown filter: grep -v type
170c4762a1bSJed Brown
171c4762a1bSJed Brown test:
172c4762a1bSJed Brown args: -mat_type seqdense -rectB
173c4762a1bSJed Brown suffix: 12_B
174c4762a1bSJed Brown
175c4762a1bSJed Brown test:
176c4762a1bSJed Brown suffix: 21
177c4762a1bSJed Brown args: -mat_type mpiaij
178c4762a1bSJed Brown filter: grep -v type
179c4762a1bSJed Brown
180c4762a1bSJed Brown test:
181c4762a1bSJed Brown suffix: 22
182c4762a1bSJed Brown args: -mat_type mpidense
183c4762a1bSJed Brown
184c4762a1bSJed Brown test:
185c4762a1bSJed Brown suffix: 23
186c4762a1bSJed Brown nsize: 3
187c4762a1bSJed Brown args: -mat_type mpiaij
188c4762a1bSJed Brown filter: grep -v type
189c4762a1bSJed Brown
190c4762a1bSJed Brown test:
191c4762a1bSJed Brown suffix: 24
192c4762a1bSJed Brown nsize: 3
193c4762a1bSJed Brown args: -mat_type mpidense
194c4762a1bSJed Brown
195c4762a1bSJed Brown test:
196c4762a1bSJed Brown suffix: 2_aijcusparse_1
197c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda
198c4762a1bSJed Brown filter: grep -v type
199c4762a1bSJed Brown output_file: output/ex5_21.out
200c4762a1bSJed Brown requires: cuda
201c4762a1bSJed Brown
202c4762a1bSJed Brown test:
203c4762a1bSJed Brown nsize: 3
204c4762a1bSJed Brown suffix: 2_aijcusparse_2
205c4762a1bSJed Brown filter: grep -v type
206c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda
207bd46da1dSJunchao Zhang args: -sf_type {{basic neighbor}}
208c4762a1bSJed Brown output_file: output/ex5_23.out
209c4762a1bSJed Brown requires: cuda
210c4762a1bSJed Brown
211c4762a1bSJed Brown test:
212c4762a1bSJed Brown nsize: 3
213c4762a1bSJed Brown suffix: 2_aijcusparse_3
214c4762a1bSJed Brown filter: grep -v type
215c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda
216c20d7725SJed Brown args: -sf_type {{basic neighbor}}
217c4762a1bSJed Brown output_file: output/ex5_23.out
218dfd57a17SPierre Jolivet requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE)
219c4762a1bSJed Brown
220c4762a1bSJed Brown test:
221c4762a1bSJed Brown suffix: 31
222c4762a1bSJed Brown args: -mat_type mpiaij -test_diagonalscale
223c4762a1bSJed Brown filter: grep -v type
224c4762a1bSJed Brown
225c4762a1bSJed Brown test:
226c4762a1bSJed Brown suffix: 32
227c4762a1bSJed Brown args: -mat_type mpibaij -test_diagonalscale
228c4762a1bSJed Brown
229c4762a1bSJed Brown test:
230c4762a1bSJed Brown suffix: 33
231c4762a1bSJed Brown nsize: 3
232c4762a1bSJed Brown args: -mat_type mpiaij -test_diagonalscale
233c4762a1bSJed Brown filter: grep -v type
234c4762a1bSJed Brown
235c4762a1bSJed Brown test:
236c4762a1bSJed Brown suffix: 34
237c4762a1bSJed Brown nsize: 3
238c4762a1bSJed Brown args: -mat_type mpibaij -test_diagonalscale
239c4762a1bSJed Brown
240c4762a1bSJed Brown test:
241c4762a1bSJed Brown suffix: 3_aijcusparse_1
242c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale
243c4762a1bSJed Brown filter: grep -v type
244c4762a1bSJed Brown output_file: output/ex5_31.out
245c4762a1bSJed Brown requires: cuda
246c4762a1bSJed Brown
247c4762a1bSJed Brown test:
248c4762a1bSJed Brown suffix: 3_aijcusparse_2
249c4762a1bSJed Brown nsize: 3
250c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale
251c4762a1bSJed Brown filter: grep -v type
252c4762a1bSJed Brown output_file: output/ex5_33.out
253c4762a1bSJed Brown requires: cuda
254c4762a1bSJed Brown
255c4762a1bSJed Brown test:
25635990778SJunchao Zhang suffix: 3_kokkos
25735990778SJunchao Zhang nsize: 3
25835990778SJunchao Zhang args: -mat_type mpiaijkokkos -vec_type kokkos -test_diagonalscale
25935990778SJunchao Zhang filter: grep -v type
26035990778SJunchao Zhang output_file: output/ex5_33.out
261dcfd994dSJunchao Zhang requires: kokkos_kernels
26235990778SJunchao Zhang
26335990778SJunchao Zhang test:
264c4762a1bSJed Brown suffix: aijcusparse_1
265c4762a1bSJed Brown args: -mat_type seqaijcusparse -vec_type cuda -rectA
266c4762a1bSJed Brown filter: grep -v type
267c4762a1bSJed Brown output_file: output/ex5_11_A.out
268c4762a1bSJed Brown requires: cuda
269c4762a1bSJed Brown
270c4762a1bSJed Brown test:
271c4762a1bSJed Brown suffix: aijcusparse_2
272c4762a1bSJed Brown args: -mat_type seqaijcusparse -vec_type cuda -rectB
273c4762a1bSJed Brown filter: grep -v type
274c4762a1bSJed Brown output_file: output/ex5_11_B.out
275c4762a1bSJed Brown requires: cuda
276c4762a1bSJed Brown
277c4762a1bSJed Brown test:
278c4762a1bSJed Brown suffix: sell_1
2795f9962eeSHong Zhang args: -mat_type sell -mat_sell_slice_height 8
280c4762a1bSJed Brown output_file: output/ex5_41.out
281c4762a1bSJed Brown
282c4762a1bSJed Brown test:
283c4762a1bSJed Brown suffix: sell_2
284c4762a1bSJed Brown nsize: 3
2855f9962eeSHong Zhang args: -mat_type sell -mat_sell_slice_height 8
286c4762a1bSJed Brown output_file: output/ex5_43.out
287c4762a1bSJed Brown
288c4762a1bSJed Brown test:
289c4762a1bSJed Brown suffix: sell_3
2905f9962eeSHong Zhang args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8
291c4762a1bSJed Brown output_file: output/ex5_51.out
292c4762a1bSJed Brown
293c4762a1bSJed Brown test:
294c4762a1bSJed Brown suffix: sell_4
295c4762a1bSJed Brown nsize: 3
2965f9962eeSHong Zhang args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8
297c4762a1bSJed Brown output_file: output/ex5_53.out
298c4762a1bSJed Brown
2992d1451d4SHong Zhang test:
3002d1451d4SHong Zhang suffix: sell_5
30190d2215bSHong Zhang nsize: 3
3025f9962eeSHong Zhang args: -mat_type sellcuda -vec_type cuda -test_diagonalscale -test_zeroentries
30390d2215bSHong Zhang output_file: output/ex5_55.out
3048711c661SHong Zhang requires: cuda !complex
3052d1451d4SHong Zhang
3065f9962eeSHong Zhang test:
3075f9962eeSHong Zhang suffix: sell_6
3085f9962eeSHong Zhang nsize: 3
3095f9962eeSHong Zhang args: -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{1 2 3 4 5 6}}
3105f9962eeSHong Zhang output_file: output/ex5_56.out
3115f9962eeSHong Zhang requires: cuda !complex
3125f9962eeSHong Zhang
3135f9962eeSHong Zhang test:
3145f9962eeSHong Zhang suffix: sell_7
3155f9962eeSHong Zhang args: -m 32 -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{0 7 9}} -mat_sell_spmv_cuda_blocky {{2 4 8 16 32}}
3165f9962eeSHong Zhang output_file: output/ex5_57.out
3175f9962eeSHong Zhang requires: cuda !complex !single
318773bf0f6SHong Zhang
319773bf0f6SHong Zhang test:
320773bf0f6SHong Zhang suffix: sell_8
321773bf0f6SHong Zhang nsize: 3
322773bf0f6SHong Zhang args: -mat_type sellhip -vec_type hip -test_diagonalscale -test_zeroentries
323773bf0f6SHong Zhang filter: sed -e "s/hip/cuda/g"
324773bf0f6SHong Zhang output_file: output/ex5_55.out
325773bf0f6SHong Zhang requires: hip !complex
326773bf0f6SHong Zhang
327773bf0f6SHong Zhang test:
328773bf0f6SHong Zhang suffix: sell_9
329773bf0f6SHong Zhang nsize: 3
330773bf0f6SHong Zhang args: -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{1 2 3 4 5 6}}
331773bf0f6SHong Zhang filter: sed -e "s/hip/cuda/g"
332773bf0f6SHong Zhang output_file: output/ex5_56.out
333773bf0f6SHong Zhang requires: hip !complex
334773bf0f6SHong Zhang
335773bf0f6SHong Zhang test:
336773bf0f6SHong Zhang suffix: sell_10
337773bf0f6SHong Zhang args: -m 32 -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{0 7 9}} -mat_sell_spmv_hip_blocky {{2 4 8 16 32}}
338773bf0f6SHong Zhang filter: sed -e "s/hip/cuda/g"
339773bf0f6SHong Zhang output_file: output/ex5_57.out
340773bf0f6SHong Zhang requires: hip !complex !single
341c4762a1bSJed Brown TEST*/
342