xref: /petsc/src/mat/tests/ex158.c (revision 6a98f8dc3f2c9149905a87dc2e9d0fedaf64e09a)
1 static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";
2 
3 /*
4  Usage:
5    mpiexec -n <np> ./ex158 -use_FFTW_interface NO
6    mpiexec -n <np> ./ex158 -use_FFTW_interface YES
7 */
8 
9 #include <petscmat.h>
10 #include <fftw3-mpi.h>
11 
12 int main(int argc,char **args)
13 {
14   PetscErrorCode ierr;
15   PetscMPIInt    rank,size;
16   PetscInt       N0=50,N1=20,N=N0*N1;
17   PetscRandom    rdm;
18   PetscScalar    a;
19   PetscReal      enorm;
20   Vec            x,y,z;
21   PetscBool      view=PETSC_FALSE,use_interface=PETSC_TRUE;
22 
23   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
24 #if defined(PETSC_USE_COMPLEX)
25   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex");
26 #endif
27 
28   ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");CHKERRQ(ierr);
29   ierr = PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158",use_interface, &use_interface, NULL);CHKERRQ(ierr);
30   ierr = PetscOptionsEnd();CHKERRQ(ierr);
31 
32   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
33   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
34 
35   ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr);
36   ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr);
37 
38   if (!use_interface) {
39     /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
40     /*---------------------------------------------------------*/
41     fftw_plan    fplan,bplan;
42     fftw_complex *data_in,*data_out,*data_out2;
43     ptrdiff_t    alloc_local,local_n0,local_0_start;
44 
45     if (!rank) printf("Use FFTW without PETSc-FFTW interface\n");
46     fftw_mpi_init();
47     N           = N0*N1;
48     alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start);
49 
50     data_in   = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
51     data_out  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
52     data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
53 
54     ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);CHKERRQ(ierr);
55     ierr = PetscObjectSetName((PetscObject) x, "Real Space vector");CHKERRQ(ierr);
56     ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);CHKERRQ(ierr);
57     ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr);
58     ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);CHKERRQ(ierr);
59     ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr);
60 
61     fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE);
62     bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);
63 
64     ierr = VecSetRandom(x, rdm);CHKERRQ(ierr);
65     if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
66 
67     fftw_execute(fplan);
68     if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
69 
70     fftw_execute(bplan);
71 
72     /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
73     a    = 1.0/(PetscReal)N;
74     ierr = VecScale(z,a);CHKERRQ(ierr);
75     if (view) {ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
76     ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr);
77     ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr);
78     if (enorm > 1.e-11) {
79       ierr = PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %g\n",(double)enorm);CHKERRQ(ierr);
80     }
81 
82     /* Free spaces */
83     fftw_destroy_plan(fplan);
84     fftw_destroy_plan(bplan);
85     fftw_free(data_in);  ierr = VecDestroy(&x);CHKERRQ(ierr);
86     fftw_free(data_out); ierr = VecDestroy(&y);CHKERRQ(ierr);
87     fftw_free(data_out2);ierr = VecDestroy(&z);CHKERRQ(ierr);
88 
89   } else {
90     /* Use PETSc-FFTW interface                  */
91     /*-------------------------------------------*/
92     PetscInt i,*dim,k,DIM;
93     Mat      A;
94     Vec      input,output;
95 
96     N=30;
97     for (i=2; i<3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */
98       DIM  = i;
99       ierr = PetscMalloc1(i,&dim);CHKERRQ(ierr);
100       for (k=0; k<i; k++) {
101         dim[k]=30;
102       }
103       N *= dim[i-1];
104 
105       /* Create FFTW object */
106       if (!rank) {
107         ierr = PetscPrintf(PETSC_COMM_SELF,"Use PETSc-FFTW interface...%d-DIM:%d \n",DIM,N);CHKERRQ(ierr);
108       }
109       ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr);
110 
111       /* Create FFTW vectors that are compatible with parallel layout of A */
112       ierr = MatCreateVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr);
113       ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr);
114       ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr);
115       ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr);
116 
117       /* Create and set PETSc vector */
118       ierr = VecCreate(PETSC_COMM_WORLD,&input);CHKERRQ(ierr);
119       ierr = VecSetSizes(input,PETSC_DECIDE,N);CHKERRQ(ierr);
120       ierr = VecSetFromOptions(input);CHKERRQ(ierr);
121       ierr = VecSetRandom(input,rdm);CHKERRQ(ierr);
122       ierr = VecDuplicate(input,&output);CHKERRQ(ierr);
123       if (view) {ierr = VecView(input,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
124 
125       /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data
126          can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original
127          data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
128          layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
129          at the end of last  dimension. This padding is required by FFTW to perform parallel real D.F.T.  */
130       ierr = VecScatterPetscToFFTW(A,input,x);CHKERRQ(ierr);/* buggy for dim = 3, 4... */
131 
132       /* Apply FFTW_FORWARD and FFTW_BACKWARD */
133       ierr = MatMult(A,x,y);CHKERRQ(ierr);
134       if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
135       ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr);
136 
137       /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc
138          performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of
139          the extra spaces that were artificially padded to perform real parallel transform.    */
140       ierr = VecScatterFFTWToPetsc(A,z,output);CHKERRQ(ierr);
141 
142       /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
143       a    = 1.0/(PetscReal)N;
144       ierr = VecScale(output,a);CHKERRQ(ierr);
145       if (view) {ierr = VecView(output,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
146       ierr = VecAXPY(output,-1.0,input);CHKERRQ(ierr);
147       ierr = VecNorm(output,NORM_1,&enorm);CHKERRQ(ierr);
148       if (enorm > 1.e-09 && !rank) {
149         ierr = PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr);
150       }
151 
152       /* Free spaces */
153       ierr = PetscFree(dim);CHKERRQ(ierr);
154       ierr = VecDestroy(&input);CHKERRQ(ierr);
155       ierr = VecDestroy(&output);CHKERRQ(ierr);
156       ierr = VecDestroy(&x);CHKERRQ(ierr);
157       ierr = VecDestroy(&y);CHKERRQ(ierr);
158       ierr = VecDestroy(&z);CHKERRQ(ierr);
159       ierr = MatDestroy(&A);CHKERRQ(ierr);
160     }
161   }
162   ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr);
163   ierr = PetscFinalize();
164   return ierr;
165 }
166 
167 
168 /*TEST
169 
170    build:
171       requires: fftw !complex
172 
173    test:
174       output_file: output/ex158.out
175 
176    test:
177       suffix: 2
178       nsize: 3
179 
180 TEST*/
181