xref: /petsc/src/mat/tests/ex158.c (revision 732aec7a18f2199fb53bb9a2f3aef439a834ce31)
1 static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";
2 
3 /*
4  Usage:
5    mpiexec -n <np> ./ex158 -use_FFTW_interface NO
6    mpiexec -n <np> ./ex158 -use_FFTW_interface YES
7 */
8 
9 #include <petscmat.h>
10 #include <fftw3-mpi.h>
11 
main(int argc,char ** args)12 int main(int argc, char **args)
13 {
14   PetscMPIInt rank, size;
15   PetscInt    N0 = 50, N1 = 20, N = N0 * N1;
16   PetscRandom rdm;
17   PetscScalar a;
18   PetscReal   enorm;
19   Vec         x, y, z;
20   PetscBool   view = PETSC_FALSE, use_interface = PETSC_TRUE;
21 
22   PetscFunctionBeginUser;
23   PetscCall(PetscInitialize(&argc, &args, NULL, help));
24 #if defined(PETSC_USE_COMPLEX)
25   SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex");
26 #endif
27 
28   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");
29   PetscCall(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158", use_interface, &use_interface, NULL));
30   PetscOptionsEnd();
31 
32   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
33   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
34 
35   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rdm));
36   PetscCall(PetscRandomSetFromOptions(rdm));
37 
38   if (!use_interface) {
39     /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
40     /*---------------------------------------------------------*/
41     fftw_plan     fplan, bplan;
42     fftw_complex *data_in, *data_out, *data_out2;
43     ptrdiff_t     alloc_local, local_n0, local_0_start;
44 
45     if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n");
46     fftw_mpi_init();
47     N           = N0 * N1;
48     alloc_local = fftw_mpi_local_size_2d(N0, N1, PETSC_COMM_WORLD, &local_n0, &local_0_start);
49 
50     data_in   = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
51     data_out  = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
52     data_out2 = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
53 
54     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_in, &x));
55     PetscCall(PetscObjectSetName((PetscObject)x, "Real Space vector"));
56     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out, &y));
57     PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
58     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out2, &z));
59     PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));
60 
61     fplan = fftw_mpi_plan_dft_2d(N0, N1, data_in, data_out, PETSC_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE);
62     bplan = fftw_mpi_plan_dft_2d(N0, N1, data_out, data_out2, PETSC_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);
63 
64     PetscCall(VecSetRandom(x, rdm));
65     if (view) PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
66 
67     fftw_execute(fplan);
68     if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
69 
70     fftw_execute(bplan);
71 
72     /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
73     a = 1.0 / (PetscReal)N;
74     PetscCall(VecScale(z, a));
75     if (view) PetscCall(VecView(z, PETSC_VIEWER_STDOUT_WORLD));
76     PetscCall(VecAXPY(z, -1.0, x));
77     PetscCall(VecNorm(z, NORM_1, &enorm));
78     if (enorm > 1.e-11) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Error norm of |x - z| %g\n", (double)enorm));
79 
80     /* Free spaces */
81     fftw_destroy_plan(fplan);
82     fftw_destroy_plan(bplan);
83     fftw_free(data_in);
84     PetscCall(VecDestroy(&x));
85     fftw_free(data_out);
86     PetscCall(VecDestroy(&y));
87     fftw_free(data_out2);
88     PetscCall(VecDestroy(&z));
89 
90   } else {
91     /* Use PETSc-FFTW interface                  */
92     /*-------------------------------------------*/
93     PetscInt i, *dim, k, DIM;
94     Mat      A;
95     Vec      input, output;
96 
97     N = 30;
98     for (i = 2; i < 3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */
99       DIM = i;
100       PetscCall(PetscMalloc1(i, &dim));
101       for (k = 0; k < i; k++) dim[k] = 30;
102       N *= dim[i - 1];
103 
104       /* Create FFTW object */
105       if (rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Use PETSc-FFTW interface...%d-DIM:%d \n", DIM, N));
106       PetscCall(MatCreateFFT(PETSC_COMM_WORLD, DIM, dim, MATFFTW, &A));
107 
108       /* Create FFTW vectors that are compatible with parallel layout of A */
109       PetscCall(MatCreateVecsFFTW(A, &x, &y, &z));
110       PetscCall(PetscObjectSetName((PetscObject)x, "Real space vector"));
111       PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
112       PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));
113 
114       /* Create and set PETSc vector */
115       PetscCall(VecCreate(PETSC_COMM_WORLD, &input));
116       PetscCall(VecSetSizes(input, PETSC_DECIDE, N));
117       PetscCall(VecSetFromOptions(input));
118       PetscCall(VecSetRandom(input, rdm));
119       PetscCall(VecDuplicate(input, &output));
120       if (view) PetscCall(VecView(input, PETSC_VIEWER_STDOUT_WORLD));
121 
122       /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data
123          can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original
124          data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
125          layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
126          at the end of last  dimension. This padding is required by FFTW to perform parallel real D.F.T.  */
127       PetscCall(VecScatterPetscToFFTW(A, input, x)); /* buggy for dim = 3, 4... */
128 
129       /* Apply FFTW_FORWARD and FFTW_BACKWARD */
130       PetscCall(MatMult(A, x, y));
131       if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
132       PetscCall(MatMultTranspose(A, y, z));
133 
134       /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc
135          performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of
136          the extra spaces that were artificially padded to perform real parallel transform.    */
137       PetscCall(VecScatterFFTWToPetsc(A, z, output));
138 
139       /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
140       a = 1.0 / (PetscReal)N;
141       PetscCall(VecScale(output, a));
142       if (view) PetscCall(VecView(output, PETSC_VIEWER_STDOUT_WORLD));
143       PetscCall(VecAXPY(output, -1.0, input));
144       PetscCall(VecNorm(output, NORM_1, &enorm));
145       if (enorm > 1.e-09 && rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Error norm of |x - z| %e\n", enorm));
146 
147       /* Free spaces */
148       PetscCall(PetscFree(dim));
149       PetscCall(VecDestroy(&input));
150       PetscCall(VecDestroy(&output));
151       PetscCall(VecDestroy(&x));
152       PetscCall(VecDestroy(&y));
153       PetscCall(VecDestroy(&z));
154       PetscCall(MatDestroy(&A));
155     }
156   }
157   PetscCall(PetscRandomDestroy(&rdm));
158   PetscCall(PetscFinalize());
159   return 0;
160 }
161 
162 /*TEST
163 
164    build:
165       requires: !mpiuni fftw !complex
166 
167    test:
168       output_file: output/ex158.out
169 
170    test:
171       suffix: 2
172       nsize: 3
173 
174 TEST*/
175