xref: /petsc/src/mat/tests/ex158.c (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc)
1 static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";
2 
3 /*
4  Usage:
5    mpiexec -n <np> ./ex158 -use_FFTW_interface NO
6    mpiexec -n <np> ./ex158 -use_FFTW_interface YES
7 */
8 
9 #include <petscmat.h>
10 #include <fftw3-mpi.h>
11 
12 int main(int argc, char **args) {
13   PetscMPIInt rank, size;
14   PetscInt    N0 = 50, N1 = 20, N = N0 * N1;
15   PetscRandom rdm;
16   PetscScalar a;
17   PetscReal   enorm;
18   Vec         x, y, z;
19   PetscBool   view = PETSC_FALSE, use_interface = PETSC_TRUE;
20 
21   PetscFunctionBeginUser;
22   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
23 #if defined(PETSC_USE_COMPLEX)
24   SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex");
25 #endif
26 
27   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");
28   PetscCall(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158", use_interface, &use_interface, NULL));
29   PetscOptionsEnd();
30 
31   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
32   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
33 
34   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rdm));
35   PetscCall(PetscRandomSetFromOptions(rdm));
36 
37   if (!use_interface) {
38     /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
39     /*---------------------------------------------------------*/
40     fftw_plan     fplan, bplan;
41     fftw_complex *data_in, *data_out, *data_out2;
42     ptrdiff_t     alloc_local, local_n0, local_0_start;
43 
44     if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n");
45     fftw_mpi_init();
46     N           = N0 * N1;
47     alloc_local = fftw_mpi_local_size_2d(N0, N1, PETSC_COMM_WORLD, &local_n0, &local_0_start);
48 
49     data_in   = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
50     data_out  = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
51     data_out2 = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
52 
53     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_in, &x));
54     PetscCall(PetscObjectSetName((PetscObject)x, "Real Space vector"));
55     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out, &y));
56     PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
57     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out2, &z));
58     PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));
59 
60     fplan = fftw_mpi_plan_dft_2d(N0, N1, data_in, data_out, PETSC_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE);
61     bplan = fftw_mpi_plan_dft_2d(N0, N1, data_out, data_out2, PETSC_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);
62 
63     PetscCall(VecSetRandom(x, rdm));
64     if (view) PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
65 
66     fftw_execute(fplan);
67     if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
68 
69     fftw_execute(bplan);
70 
71     /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
72     a = 1.0 / (PetscReal)N;
73     PetscCall(VecScale(z, a));
74     if (view) PetscCall(VecView(z, PETSC_VIEWER_STDOUT_WORLD));
75     PetscCall(VecAXPY(z, -1.0, x));
76     PetscCall(VecNorm(z, NORM_1, &enorm));
77     if (enorm > 1.e-11) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Error norm of |x - z| %g\n", (double)enorm)); }
78 
79     /* Free spaces */
80     fftw_destroy_plan(fplan);
81     fftw_destroy_plan(bplan);
82     fftw_free(data_in);
83     PetscCall(VecDestroy(&x));
84     fftw_free(data_out);
85     PetscCall(VecDestroy(&y));
86     fftw_free(data_out2);
87     PetscCall(VecDestroy(&z));
88 
89   } else {
90     /* Use PETSc-FFTW interface                  */
91     /*-------------------------------------------*/
92     PetscInt i, *dim, k, DIM;
93     Mat      A;
94     Vec      input, output;
95 
96     N = 30;
97     for (i = 2; i < 3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */
98       DIM = i;
99       PetscCall(PetscMalloc1(i, &dim));
100       for (k = 0; k < i; k++) { dim[k] = 30; }
101       N *= dim[i - 1];
102 
103       /* Create FFTW object */
104       if (rank == 0) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "Use PETSc-FFTW interface...%d-DIM:%d \n", DIM, N)); }
105       PetscCall(MatCreateFFT(PETSC_COMM_WORLD, DIM, dim, MATFFTW, &A));
106 
107       /* Create FFTW vectors that are compatible with parallel layout of A */
108       PetscCall(MatCreateVecsFFTW(A, &x, &y, &z));
109       PetscCall(PetscObjectSetName((PetscObject)x, "Real space vector"));
110       PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
111       PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));
112 
113       /* Create and set PETSc vector */
114       PetscCall(VecCreate(PETSC_COMM_WORLD, &input));
115       PetscCall(VecSetSizes(input, PETSC_DECIDE, N));
116       PetscCall(VecSetFromOptions(input));
117       PetscCall(VecSetRandom(input, rdm));
118       PetscCall(VecDuplicate(input, &output));
119       if (view) PetscCall(VecView(input, PETSC_VIEWER_STDOUT_WORLD));
120 
121       /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data
122          can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original
123          data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
124          layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
125          at the end of last  dimension. This padding is required by FFTW to perform parallel real D.F.T.  */
126       PetscCall(VecScatterPetscToFFTW(A, input, x)); /* buggy for dim = 3, 4... */
127 
128       /* Apply FFTW_FORWARD and FFTW_BACKWARD */
129       PetscCall(MatMult(A, x, y));
130       if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
131       PetscCall(MatMultTranspose(A, y, z));
132 
133       /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc
134          performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of
135          the extra spaces that were artificially padded to perform real parallel transform.    */
136       PetscCall(VecScatterFFTWToPetsc(A, z, output));
137 
138       /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
139       a = 1.0 / (PetscReal)N;
140       PetscCall(VecScale(output, a));
141       if (view) PetscCall(VecView(output, PETSC_VIEWER_STDOUT_WORLD));
142       PetscCall(VecAXPY(output, -1.0, input));
143       PetscCall(VecNorm(output, NORM_1, &enorm));
144       if (enorm > 1.e-09 && rank == 0) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Error norm of |x - z| %e\n", enorm)); }
145 
146       /* Free spaces */
147       PetscCall(PetscFree(dim));
148       PetscCall(VecDestroy(&input));
149       PetscCall(VecDestroy(&output));
150       PetscCall(VecDestroy(&x));
151       PetscCall(VecDestroy(&y));
152       PetscCall(VecDestroy(&z));
153       PetscCall(MatDestroy(&A));
154     }
155   }
156   PetscCall(PetscRandomDestroy(&rdm));
157   PetscCall(PetscFinalize());
158   return 0;
159 }
160 
161 /*TEST
162 
163    build:
164       requires: !mpiuni fftw !complex
165 
166    test:
167       output_file: output/ex158.out
168 
169    test:
170       suffix: 2
171       nsize: 3
172 
173 TEST*/
174