1 /* This program illustrates use of parallel real FFT */
2 static char help[] = "This program illustrates the use of parallel real 2D fft using fftw without PETSc interface";
3
4 #include <petscmat.h>
5 #include <fftw3.h>
6 #include <fftw3-mpi.h>
7
main(int argc,char ** args)8 int main(int argc, char **args)
9 {
10 const ptrdiff_t N0 = 2056, N1 = 2056;
11 fftw_plan bplan, fplan;
12 fftw_complex *out;
13 double *in1, *in2;
14 ptrdiff_t alloc_local, local_n0, local_0_start;
15 ptrdiff_t local_n1, local_1_start;
16 PetscInt i, j;
17 PetscMPIInt size, rank;
18 int n, N, N_factor, NM;
19 PetscScalar one = 2.0, zero = 0.5;
20 PetscScalar two = 4.0, three = 8.0, four = 16.0;
21 PetscScalar a, *x_arr, *y_arr, *z_arr;
22 PetscReal enorm;
23 Vec fin, fout, fout1;
24 Vec ini, final;
25 PetscRandom rnd;
26 PetscInt *indx3, tempindx, low, *indx4, tempindx1;
27
28 PetscFunctionBeginUser;
29 PetscCall(PetscInitialize(&argc, &args, NULL, help));
30 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
31 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
32
33 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rnd));
34
35 alloc_local = fftw_mpi_local_size_2d_transposed(N0, N1 / 2 + 1, PETSC_COMM_WORLD, &local_n0, &local_0_start, &local_n1, &local_1_start);
36 #if defined(DEBUGGING)
37 printf("The value alloc_local is %ld from process %d\n", alloc_local, rank);
38 printf("The value local_n0 is %ld from process %d\n", local_n0, rank);
39 printf("The value local_0_start is %ld from process %d\n", local_0_start, rank);
40 /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */
41 /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank); */
42 /* printf("The value local_n0 is %ld from process %d\n",local_n0,rank); */
43 #endif
44
45 /* Allocate space for input and output arrays */
46 in1 = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
47 in2 = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
48 out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
49
50 N = 2 * N0 * (N1 / 2 + 1);
51 N_factor = N0 * N1;
52 n = 2 * local_n0 * (N1 / 2 + 1);
53
54 /* printf("The value N is %d from process %d\n",N,rank); */
55 /* printf("The value n is %d from process %d\n",n,rank); */
56 /* printf("The value n1 is %d from process %d\n",n1,rank);*/
57 /* Creating data vector and accompanying array with VeccreateMPIWithArray */
58 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)in1, &fin));
59 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)out, &fout));
60 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)in2, &fout1));
61
62 /* Set the vector with random data */
63 PetscCall(VecSet(fin, zero));
64 /* for (i=0;i<N0*N1;i++) */
65 /* { */
66 /* VecSetValues(fin,1,&i,&one,INSERT_VALUES); */
67 /* } */
68
69 /* VecSet(fin,one); */
70 i = 0;
71 PetscCall(VecSetValues(fin, 1, &i, &one, INSERT_VALUES));
72 i = 1;
73 PetscCall(VecSetValues(fin, 1, &i, &two, INSERT_VALUES));
74 i = 4;
75 PetscCall(VecSetValues(fin, 1, &i, &three, INSERT_VALUES));
76 i = 5;
77 PetscCall(VecSetValues(fin, 1, &i, &four, INSERT_VALUES));
78 PetscCall(VecAssemblyBegin(fin));
79 PetscCall(VecAssemblyEnd(fin));
80
81 PetscCall(VecSet(fout, zero));
82 PetscCall(VecSet(fout1, zero));
83
84 /* Get the meaningful portion of array */
85 PetscCall(VecGetArray(fin, &x_arr));
86 PetscCall(VecGetArray(fout1, &z_arr));
87 PetscCall(VecGetArray(fout, &y_arr));
88
89 fplan = fftw_mpi_plan_dft_r2c_2d(N0, N1, (double *)x_arr, (fftw_complex *)y_arr, PETSC_COMM_WORLD, FFTW_ESTIMATE);
90 bplan = fftw_mpi_plan_dft_c2r_2d(N0, N1, (fftw_complex *)y_arr, (double *)z_arr, PETSC_COMM_WORLD, FFTW_ESTIMATE);
91
92 fftw_execute(fplan);
93 fftw_execute(bplan);
94
95 PetscCall(VecRestoreArray(fin, &x_arr));
96 PetscCall(VecRestoreArray(fout1, &z_arr));
97 PetscCall(VecRestoreArray(fout, &y_arr));
98
99 /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */
100 PetscCall(VecCreate(PETSC_COMM_WORLD, &ini));
101 PetscCall(VecCreate(PETSC_COMM_WORLD, &final));
102 PetscCall(VecSetSizes(ini, local_n0 * N1, N0 * N1));
103 PetscCall(VecSetSizes(final, local_n0 * N1, N0 * N1));
104 PetscCall(VecSetFromOptions(ini));
105 PetscCall(VecSetFromOptions(final));
106
107 if (N1 % 2 == 0) {
108 NM = N1 + 2;
109 } else {
110 NM = N1 + 1;
111 }
112 /*printf("The Value of NM is %d",NM); */
113 PetscCall(VecGetOwnershipRange(fin, &low, NULL));
114 /*printf("The local index is %d from %d\n",low,rank); */
115 PetscCall(PetscMalloc1(local_n0 * N1, &indx3));
116 PetscCall(PetscMalloc1(local_n0 * N1, &indx4));
117 for (i = 0; i < local_n0; i++) {
118 for (j = 0; j < N1; j++) {
119 tempindx = i * N1 + j;
120 tempindx1 = i * NM + j;
121
122 indx3[tempindx] = local_0_start * N1 + tempindx;
123 indx4[tempindx] = low + tempindx1;
124 /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */
125 /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */
126 }
127 }
128
129 PetscCall(PetscMalloc2(local_n0 * N1, &x_arr, local_n0 * N1, &y_arr)); /* arr must be allocated for VecGetValues() */
130 PetscCall(VecGetValues(fin, local_n0 * N1, indx4, (PetscScalar *)x_arr));
131 PetscCall(VecSetValues(ini, local_n0 * N1, indx3, x_arr, INSERT_VALUES));
132
133 PetscCall(VecAssemblyBegin(ini));
134 PetscCall(VecAssemblyEnd(ini));
135
136 PetscCall(VecGetValues(fout1, local_n0 * N1, indx4, y_arr));
137 PetscCall(VecSetValues(final, local_n0 * N1, indx3, y_arr, INSERT_VALUES));
138 PetscCall(VecAssemblyBegin(final));
139 PetscCall(VecAssemblyEnd(final));
140 PetscCall(PetscFree2(x_arr, y_arr));
141
142 /*
143 VecScatter vecscat;
144 IS indx1,indx2;
145 for (i=0;i<N0;i++) {
146 indx = i*NM;
147 ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx1);
148 indx = i*N1;
149 ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx2);
150 VecScatterCreate(fin,indx1,ini,indx2,&vecscat);
151 VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD);
152 VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD);
153 VecScatterBegin(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD);
154 VecScatterEnd(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD);
155 }
156 */
157
158 a = 1.0 / (PetscReal)N_factor;
159 PetscCall(VecScale(fout1, a));
160 PetscCall(VecScale(final, a));
161
162 /* VecView(ini,PETSC_VIEWER_STDOUT_WORLD); */
163 /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */
164 PetscCall(VecAXPY(final, -1.0, ini));
165
166 PetscCall(VecNorm(final, NORM_1, &enorm));
167 if (enorm > 1.e-10) PetscCall(PetscPrintf(PETSC_COMM_WORLD, " Error norm of |x - z| = %e\n", enorm));
168
169 /* Execute fftw with function fftw_execute and destroy it after execution */
170 fftw_destroy_plan(fplan);
171 fftw_destroy_plan(bplan);
172 fftw_free(in1);
173 PetscCall(VecDestroy(&fin));
174 fftw_free(out);
175 PetscCall(VecDestroy(&fout));
176 fftw_free(in2);
177 PetscCall(VecDestroy(&fout1));
178
179 PetscCall(VecDestroy(&ini));
180 PetscCall(VecDestroy(&final));
181
182 PetscCall(PetscRandomDestroy(&rnd));
183 PetscCall(PetscFree(indx3));
184 PetscCall(PetscFree(indx4));
185 PetscCall(PetscFinalize());
186 return 0;
187 }
188
189 /*TEST
190
191 build:
192 requires: !mpiuni fftw !complex
193
194 test:
195 output_file: output/empty.out
196
197 test:
198 suffix: 2
199 nsize: 3
200 output_file: output/empty.out
201
202 TEST*/
203