1 static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";
2
3 /*
4 Usage:
5 mpiexec -n <np> ./ex158 -use_FFTW_interface NO
6 mpiexec -n <np> ./ex158 -use_FFTW_interface YES
7 */
8
9 #include <petscmat.h>
10 #include <fftw3-mpi.h>
11
main(int argc,char ** args)12 int main(int argc, char **args)
13 {
14 PetscMPIInt rank, size;
15 PetscInt N0 = 50, N1 = 20, N = N0 * N1;
16 PetscRandom rdm;
17 PetscScalar a;
18 PetscReal enorm;
19 Vec x, y, z;
20 PetscBool view = PETSC_FALSE, use_interface = PETSC_TRUE;
21
22 PetscFunctionBeginUser;
23 PetscCall(PetscInitialize(&argc, &args, NULL, help));
24 #if defined(PETSC_USE_COMPLEX)
25 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex");
26 #endif
27
28 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");
29 PetscCall(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158", use_interface, &use_interface, NULL));
30 PetscOptionsEnd();
31
32 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
33 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
34
35 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rdm));
36 PetscCall(PetscRandomSetFromOptions(rdm));
37
38 if (!use_interface) {
39 /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
40 /*---------------------------------------------------------*/
41 fftw_plan fplan, bplan;
42 fftw_complex *data_in, *data_out, *data_out2;
43 ptrdiff_t alloc_local, local_n0, local_0_start;
44
45 if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n");
46 fftw_mpi_init();
47 N = N0 * N1;
48 alloc_local = fftw_mpi_local_size_2d(N0, N1, PETSC_COMM_WORLD, &local_n0, &local_0_start);
49
50 data_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
51 data_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
52 data_out2 = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
53
54 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_in, &x));
55 PetscCall(PetscObjectSetName((PetscObject)x, "Real Space vector"));
56 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out, &y));
57 PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
58 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out2, &z));
59 PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));
60
61 fplan = fftw_mpi_plan_dft_2d(N0, N1, data_in, data_out, PETSC_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE);
62 bplan = fftw_mpi_plan_dft_2d(N0, N1, data_out, data_out2, PETSC_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);
63
64 PetscCall(VecSetRandom(x, rdm));
65 if (view) PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
66
67 fftw_execute(fplan);
68 if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
69
70 fftw_execute(bplan);
71
72 /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
73 a = 1.0 / (PetscReal)N;
74 PetscCall(VecScale(z, a));
75 if (view) PetscCall(VecView(z, PETSC_VIEWER_STDOUT_WORLD));
76 PetscCall(VecAXPY(z, -1.0, x));
77 PetscCall(VecNorm(z, NORM_1, &enorm));
78 if (enorm > 1.e-11) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Error norm of |x - z| %g\n", (double)enorm));
79
80 /* Free spaces */
81 fftw_destroy_plan(fplan);
82 fftw_destroy_plan(bplan);
83 fftw_free(data_in);
84 PetscCall(VecDestroy(&x));
85 fftw_free(data_out);
86 PetscCall(VecDestroy(&y));
87 fftw_free(data_out2);
88 PetscCall(VecDestroy(&z));
89
90 } else {
91 /* Use PETSc-FFTW interface */
92 /*-------------------------------------------*/
93 PetscInt i, *dim, k, DIM;
94 Mat A;
95 Vec input, output;
96
97 N = 30;
98 for (i = 2; i < 3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */
99 DIM = i;
100 PetscCall(PetscMalloc1(i, &dim));
101 for (k = 0; k < i; k++) dim[k] = 30;
102 N *= dim[i - 1];
103
104 /* Create FFTW object */
105 if (rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Use PETSc-FFTW interface...%d-DIM:%d \n", DIM, N));
106 PetscCall(MatCreateFFT(PETSC_COMM_WORLD, DIM, dim, MATFFTW, &A));
107
108 /* Create FFTW vectors that are compatible with parallel layout of A */
109 PetscCall(MatCreateVecsFFTW(A, &x, &y, &z));
110 PetscCall(PetscObjectSetName((PetscObject)x, "Real space vector"));
111 PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
112 PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));
113
114 /* Create and set PETSc vector */
115 PetscCall(VecCreate(PETSC_COMM_WORLD, &input));
116 PetscCall(VecSetSizes(input, PETSC_DECIDE, N));
117 PetscCall(VecSetFromOptions(input));
118 PetscCall(VecSetRandom(input, rdm));
119 PetscCall(VecDuplicate(input, &output));
120 if (view) PetscCall(VecView(input, PETSC_VIEWER_STDOUT_WORLD));
121
122 /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data
123 can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original
124 data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
125 layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
126 at the end of last dimension. This padding is required by FFTW to perform parallel real D.F.T. */
127 PetscCall(VecScatterPetscToFFTW(A, input, x)); /* buggy for dim = 3, 4... */
128
129 /* Apply FFTW_FORWARD and FFTW_BACKWARD */
130 PetscCall(MatMult(A, x, y));
131 if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
132 PetscCall(MatMultTranspose(A, y, z));
133
134 /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc
135 performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of
136 the extra spaces that were artificially padded to perform real parallel transform. */
137 PetscCall(VecScatterFFTWToPetsc(A, z, output));
138
139 /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
140 a = 1.0 / (PetscReal)N;
141 PetscCall(VecScale(output, a));
142 if (view) PetscCall(VecView(output, PETSC_VIEWER_STDOUT_WORLD));
143 PetscCall(VecAXPY(output, -1.0, input));
144 PetscCall(VecNorm(output, NORM_1, &enorm));
145 if (enorm > 1.e-09 && rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Error norm of |x - z| %e\n", enorm));
146
147 /* Free spaces */
148 PetscCall(PetscFree(dim));
149 PetscCall(VecDestroy(&input));
150 PetscCall(VecDestroy(&output));
151 PetscCall(VecDestroy(&x));
152 PetscCall(VecDestroy(&y));
153 PetscCall(VecDestroy(&z));
154 PetscCall(MatDestroy(&A));
155 }
156 }
157 PetscCall(PetscRandomDestroy(&rdm));
158 PetscCall(PetscFinalize());
159 return 0;
160 }
161
162 /*TEST
163
164 build:
165 requires: !mpiuni fftw !complex
166
167 test:
168 output_file: output/ex158.out
169
170 test:
171 suffix: 2
172 nsize: 3
173
174 TEST*/
175