xref: /petsc/src/mat/tests/ex158.c (revision 40badf4fbc550ac1f60bd080eaff6de6d55b946d)
1 static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";
2 
3 /*
4  Usage:
5    mpiexec -n <np> ./ex158 -use_FFTW_interface NO
6    mpiexec -n <np> ./ex158 -use_FFTW_interface YES
7 */
8 
9 #include <petscmat.h>
10 #include <fftw3-mpi.h>
11 
12 int main(int argc,char **args)
13 {
14   PetscErrorCode ierr;
15   PetscMPIInt    rank,size;
16   PetscInt       N0=50,N1=20,N=N0*N1;
17   PetscRandom    rdm;
18   PetscScalar    a;
19   PetscReal      enorm;
20   Vec            x,y,z;
21   PetscBool      view=PETSC_FALSE,use_interface=PETSC_TRUE;
22 
23   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
24 #if defined(PETSC_USE_COMPLEX)
25   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex");
26 #endif
27 
28   ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");CHKERRQ(ierr);
29   CHKERRQ(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158",use_interface, &use_interface, NULL));
30   ierr = PetscOptionsEnd();CHKERRQ(ierr);
31 
32   CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
33   CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
34 
35   CHKERRQ(PetscRandomCreate(PETSC_COMM_WORLD, &rdm));
36   CHKERRQ(PetscRandomSetFromOptions(rdm));
37 
38   if (!use_interface) {
39     /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
40     /*---------------------------------------------------------*/
41     fftw_plan    fplan,bplan;
42     fftw_complex *data_in,*data_out,*data_out2;
43     ptrdiff_t    alloc_local,local_n0,local_0_start;
44 
45     if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n");
46     fftw_mpi_init();
47     N           = N0*N1;
48     alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start);
49 
50     data_in   = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
51     data_out  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
52     data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
53 
54     CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x));
55     CHKERRQ(PetscObjectSetName((PetscObject) x, "Real Space vector"));
56     CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y));
57     CHKERRQ(PetscObjectSetName((PetscObject) y, "Frequency space vector"));
58     CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z));
59     CHKERRQ(PetscObjectSetName((PetscObject) z, "Reconstructed vector"));
60 
61     fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE);
62     bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);
63 
64     CHKERRQ(VecSetRandom(x, rdm));
65     if (view) CHKERRQ(VecView(x,PETSC_VIEWER_STDOUT_WORLD));
66 
67     fftw_execute(fplan);
68     if (view) CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD));
69 
70     fftw_execute(bplan);
71 
72     /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
73     a    = 1.0/(PetscReal)N;
74     CHKERRQ(VecScale(z,a));
75     if (view) CHKERRQ(VecView(z, PETSC_VIEWER_STDOUT_WORLD));
76     CHKERRQ(VecAXPY(z,-1.0,x));
77     CHKERRQ(VecNorm(z,NORM_1,&enorm));
78     if (enorm > 1.e-11) {
79       CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %g\n",(double)enorm));
80     }
81 
82     /* Free spaces */
83     fftw_destroy_plan(fplan);
84     fftw_destroy_plan(bplan);
85     fftw_free(data_in);  CHKERRQ(VecDestroy(&x));
86     fftw_free(data_out); CHKERRQ(VecDestroy(&y));
87     fftw_free(data_out2);CHKERRQ(VecDestroy(&z));
88 
89   } else {
90     /* Use PETSc-FFTW interface                  */
91     /*-------------------------------------------*/
92     PetscInt i,*dim,k,DIM;
93     Mat      A;
94     Vec      input,output;
95 
96     N=30;
97     for (i=2; i<3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */
98       DIM  = i;
99       CHKERRQ(PetscMalloc1(i,&dim));
100       for (k=0; k<i; k++) {
101         dim[k]=30;
102       }
103       N *= dim[i-1];
104 
105       /* Create FFTW object */
106       if (rank == 0) {
107         CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"Use PETSc-FFTW interface...%d-DIM:%d \n",DIM,N));
108       }
109       CHKERRQ(MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A));
110 
111       /* Create FFTW vectors that are compatible with parallel layout of A */
112       CHKERRQ(MatCreateVecsFFTW(A,&x,&y,&z));
113       CHKERRQ(PetscObjectSetName((PetscObject) x, "Real space vector"));
114       CHKERRQ(PetscObjectSetName((PetscObject) y, "Frequency space vector"));
115       CHKERRQ(PetscObjectSetName((PetscObject) z, "Reconstructed vector"));
116 
117       /* Create and set PETSc vector */
118       CHKERRQ(VecCreate(PETSC_COMM_WORLD,&input));
119       CHKERRQ(VecSetSizes(input,PETSC_DECIDE,N));
120       CHKERRQ(VecSetFromOptions(input));
121       CHKERRQ(VecSetRandom(input,rdm));
122       CHKERRQ(VecDuplicate(input,&output));
123       if (view) CHKERRQ(VecView(input,PETSC_VIEWER_STDOUT_WORLD));
124 
125       /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data
126          can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original
127          data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
128          layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
129          at the end of last  dimension. This padding is required by FFTW to perform parallel real D.F.T.  */
130       CHKERRQ(VecScatterPetscToFFTW(A,input,x));/* buggy for dim = 3, 4... */
131 
132       /* Apply FFTW_FORWARD and FFTW_BACKWARD */
133       CHKERRQ(MatMult(A,x,y));
134       if (view) CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD));
135       CHKERRQ(MatMultTranspose(A,y,z));
136 
137       /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc
138          performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of
139          the extra spaces that were artificially padded to perform real parallel transform.    */
140       CHKERRQ(VecScatterFFTWToPetsc(A,z,output));
141 
142       /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
143       a    = 1.0/(PetscReal)N;
144       CHKERRQ(VecScale(output,a));
145       if (view) CHKERRQ(VecView(output,PETSC_VIEWER_STDOUT_WORLD));
146       CHKERRQ(VecAXPY(output,-1.0,input));
147       CHKERRQ(VecNorm(output,NORM_1,&enorm));
148       if (enorm > 1.e-09 && rank == 0) {
149         CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %e\n",enorm));
150       }
151 
152       /* Free spaces */
153       CHKERRQ(PetscFree(dim));
154       CHKERRQ(VecDestroy(&input));
155       CHKERRQ(VecDestroy(&output));
156       CHKERRQ(VecDestroy(&x));
157       CHKERRQ(VecDestroy(&y));
158       CHKERRQ(VecDestroy(&z));
159       CHKERRQ(MatDestroy(&A));
160     }
161   }
162   CHKERRQ(PetscRandomDestroy(&rdm));
163   ierr = PetscFinalize();
164   return ierr;
165 }
166 
167 /*TEST
168 
169    build:
170       requires: !mpiuni fftw !complex
171 
172    test:
173       output_file: output/ex158.out
174 
175    test:
176       suffix: 2
177       nsize: 3
178 
179 TEST*/
180