1 /*
2 Provides an interface to the FFTW package.
3 Testing examples can be found in ~src/mat/tests
4 */
5
6 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
7 EXTERN_C_BEGIN
8 #if !PetscDefined(HAVE_MPIUNI)
9 #include <fftw3-mpi.h>
10 #else
11 #include <fftw3.h>
12 #endif
13 EXTERN_C_END
14
15 typedef struct {
16 PetscInt ndim_fftw;
17 ptrdiff_t *dim_fftw;
18 #if defined(PETSC_USE_64BIT_INDICES)
19 fftw_iodim64 *iodims;
20 #else
21 fftw_iodim *iodims;
22 #endif
23 PetscInt partial_dim;
24 fftw_plan p_forward, p_backward;
25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
27 } Mat_FFTW;
28
29 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
30
MatMult_SeqFFTW(Mat A,Vec x,Vec y)31 static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
32 {
33 Mat_FFT *fft = (Mat_FFT *)A->data;
34 Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
35 const PetscScalar *x_array;
36 PetscScalar *y_array;
37 Vec xx;
38 #if defined(PETSC_USE_COMPLEX)
39 #if defined(PETSC_USE_64BIT_INDICES)
40 fftw_iodim64 *iodims;
41 #else
42 fftw_iodim *iodims;
43 #endif
44 PetscInt i;
45 #endif
46 PetscInt ndim = fft->ndim, *dim = fft->dim;
47
48 PetscFunctionBegin;
49 if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
50 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
51 PetscCall(VecDuplicate(x, &xx));
52 PetscCall(VecGetArrayRead(xx, &x_array));
53 } else {
54 PetscCall(VecGetArrayRead(x, &x_array));
55 }
56 PetscCall(VecGetArray(y, &y_array));
57 if (!fftw->p_forward) { /* create a plan, then execute it */
58 switch (ndim) {
59 case 1:
60 #if defined(PETSC_USE_COMPLEX)
61 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
62 #else
63 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
64 #endif
65 break;
66 case 2:
67 #if defined(PETSC_USE_COMPLEX)
68 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
69 #else
70 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
71 #endif
72 break;
73 case 3:
74 #if defined(PETSC_USE_COMPLEX)
75 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
76 #else
77 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
78 #endif
79 break;
80 default:
81 #if defined(PETSC_USE_COMPLEX)
82 iodims = fftw->iodims;
83 #if defined(PETSC_USE_64BIT_INDICES)
84 if (ndim) {
85 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
86 iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
87 for (i = ndim - 2; i >= 0; --i) {
88 iodims[i].n = (ptrdiff_t)dim[i];
89 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
90 }
91 }
92 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
93 #else
94 if (ndim) {
95 iodims[ndim - 1].n = (int)dim[ndim - 1];
96 iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
97 for (i = ndim - 2; i >= 0; --i) {
98 iodims[i].n = (int)dim[i];
99 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
100 }
101 }
102 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
103 #endif
104
105 #else
106 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
107 #endif
108 break;
109 }
110 if (fftw->p_flag != FFTW_ESTIMATE) {
111 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
112 PetscCall(VecRestoreArrayRead(xx, &x_array));
113 PetscCall(VecDestroy(&xx));
114 PetscCall(VecGetArrayRead(x, &x_array));
115 } else {
116 fftw->finarray = (PetscScalar *)x_array;
117 fftw->foutarray = y_array;
118 }
119 }
120
121 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122 #if defined(PETSC_USE_COMPLEX)
123 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
124 #else
125 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
126 #endif
127 } else {
128 fftw_execute(fftw->p_forward);
129 }
130 PetscCall(VecRestoreArray(y, &y_array));
131 PetscCall(VecRestoreArrayRead(x, &x_array));
132 PetscFunctionReturn(PETSC_SUCCESS);
133 }
134
MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)135 static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
136 {
137 Mat_FFT *fft = (Mat_FFT *)A->data;
138 Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
139 const PetscScalar *x_array;
140 PetscScalar *y_array;
141 PetscInt ndim = fft->ndim, *dim = fft->dim;
142 Vec xx;
143 #if defined(PETSC_USE_COMPLEX)
144 #if defined(PETSC_USE_64BIT_INDICES)
145 fftw_iodim64 *iodims = fftw->iodims;
146 #else
147 fftw_iodim *iodims = fftw->iodims;
148 #endif
149 #endif
150
151 PetscFunctionBegin;
152 if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
153 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
154 PetscCall(VecDuplicate(x, &xx));
155 PetscCall(VecGetArrayRead(xx, &x_array));
156 } else {
157 PetscCall(VecGetArrayRead(x, &x_array));
158 }
159 PetscCall(VecGetArray(y, &y_array));
160 if (!fftw->p_backward) { /* create a plan, then execute it */
161 switch (ndim) {
162 case 1:
163 #if defined(PETSC_USE_COMPLEX)
164 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
165 #else
166 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
167 #endif
168 break;
169 case 2:
170 #if defined(PETSC_USE_COMPLEX)
171 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
172 #else
173 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
174 #endif
175 break;
176 case 3:
177 #if defined(PETSC_USE_COMPLEX)
178 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179 #else
180 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
181 #endif
182 break;
183 default:
184 #if defined(PETSC_USE_COMPLEX)
185 #if defined(PETSC_USE_64BIT_INDICES)
186 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
187 #else
188 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
189 #endif
190 #else
191 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
192 #endif
193 break;
194 }
195 if (fftw->p_flag != FFTW_ESTIMATE) {
196 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
197 PetscCall(VecRestoreArrayRead(xx, &x_array));
198 PetscCall(VecDestroy(&xx));
199 PetscCall(VecGetArrayRead(x, &x_array));
200 } else {
201 fftw->binarray = (PetscScalar *)x_array;
202 fftw->boutarray = y_array;
203 }
204 }
205 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
206 #if defined(PETSC_USE_COMPLEX)
207 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
208 #else
209 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
210 #endif
211 } else {
212 fftw_execute(fftw->p_backward);
213 }
214 PetscCall(VecRestoreArray(y, &y_array));
215 PetscCall(VecRestoreArrayRead(x, &x_array));
216 PetscFunctionReturn(PETSC_SUCCESS);
217 }
218
219 #if !PetscDefined(HAVE_MPIUNI)
MatMult_MPIFFTW(Mat A,Vec x,Vec y)220 static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
221 {
222 Mat_FFT *fft = (Mat_FFT *)A->data;
223 Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
224 const PetscScalar *x_array;
225 PetscScalar *y_array;
226 PetscInt ndim = fft->ndim, *dim = fft->dim;
227 MPI_Comm comm;
228 Vec xx;
229
230 PetscFunctionBegin;
231 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
232 if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
233 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
234 PetscCall(VecDuplicate(x, &xx));
235 PetscCall(VecGetArrayRead(xx, &x_array));
236 } else {
237 PetscCall(VecGetArrayRead(x, &x_array));
238 }
239 PetscCall(VecGetArray(y, &y_array));
240 if (!fftw->p_forward) { /* create a plan, then execute it */
241 switch (ndim) {
242 case 1:
243 #if defined(PETSC_USE_COMPLEX)
244 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
245 #else
246 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
247 #endif
248 break;
249 case 2:
250 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
252 #else
253 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
254 #endif
255 break;
256 case 3:
257 #if defined(PETSC_USE_COMPLEX)
258 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
259 #else
260 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
261 #endif
262 break;
263 default:
264 #if defined(PETSC_USE_COMPLEX)
265 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
266 #else
267 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
268 #endif
269 break;
270 }
271 if (fftw->p_flag != FFTW_ESTIMATE) {
272 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
273 PetscCall(VecRestoreArrayRead(xx, &x_array));
274 PetscCall(VecDestroy(&xx));
275 PetscCall(VecGetArrayRead(x, &x_array));
276 } else {
277 fftw->finarray = (PetscScalar *)x_array;
278 fftw->foutarray = y_array;
279 }
280 }
281 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
282 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
283 } else {
284 fftw_execute(fftw->p_forward);
285 }
286 PetscCall(VecRestoreArray(y, &y_array));
287 PetscCall(VecRestoreArrayRead(x, &x_array));
288 PetscFunctionReturn(PETSC_SUCCESS);
289 }
290
MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)291 static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
292 {
293 Mat_FFT *fft = (Mat_FFT *)A->data;
294 Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
295 const PetscScalar *x_array;
296 PetscScalar *y_array;
297 PetscInt ndim = fft->ndim, *dim = fft->dim;
298 MPI_Comm comm;
299 Vec xx;
300
301 PetscFunctionBegin;
302 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
303 if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
304 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
305 PetscCall(VecDuplicate(x, &xx));
306 PetscCall(VecGetArrayRead(xx, &x_array));
307 } else {
308 PetscCall(VecGetArrayRead(x, &x_array));
309 }
310 PetscCall(VecGetArray(y, &y_array));
311 if (!fftw->p_backward) { /* create a plan, then execute it */
312 switch (ndim) {
313 case 1:
314 #if defined(PETSC_USE_COMPLEX)
315 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
316 #else
317 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
318 #endif
319 break;
320 case 2:
321 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
322 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
323 #else
324 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
325 #endif
326 break;
327 case 3:
328 #if defined(PETSC_USE_COMPLEX)
329 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
330 #else
331 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
332 #endif
333 break;
334 default:
335 #if defined(PETSC_USE_COMPLEX)
336 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
337 #else
338 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
339 #endif
340 break;
341 }
342 if (fftw->p_flag != FFTW_ESTIMATE) {
343 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
344 PetscCall(VecRestoreArrayRead(xx, &x_array));
345 PetscCall(VecDestroy(&xx));
346 PetscCall(VecGetArrayRead(x, &x_array));
347 } else {
348 fftw->binarray = (PetscScalar *)x_array;
349 fftw->boutarray = y_array;
350 }
351 }
352 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
353 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
354 } else {
355 fftw_execute(fftw->p_backward);
356 }
357 PetscCall(VecRestoreArray(y, &y_array));
358 PetscCall(VecRestoreArrayRead(x, &x_array));
359 PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 #endif
362
MatDestroy_FFTW(Mat A)363 static PetscErrorCode MatDestroy_FFTW(Mat A)
364 {
365 Mat_FFT *fft = (Mat_FFT *)A->data;
366 Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
367
368 PetscFunctionBegin;
369 fftw_destroy_plan(fftw->p_forward);
370 fftw_destroy_plan(fftw->p_backward);
371 if (fftw->iodims) free(fftw->iodims);
372 PetscCall(PetscFree(fftw->dim_fftw));
373 PetscCall(PetscFree(fft->data));
374 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
375 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
376 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
377 #if !PetscDefined(HAVE_MPIUNI)
378 fftw_mpi_cleanup();
379 #endif
380 PetscFunctionReturn(PETSC_SUCCESS);
381 }
382
383 #if !PetscDefined(HAVE_MPIUNI)
384 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
VecDestroy_MPIFFTW(Vec v)385 static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
386 {
387 PetscScalar *array;
388
389 PetscFunctionBegin;
390 PetscCall(VecGetArray(v, &array));
391 fftw_free((fftw_complex *)array);
392 PetscCall(VecRestoreArray(v, &array));
393 PetscCall(VecDestroy_MPI(v));
394 PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 #endif
397
398 #if !PetscDefined(HAVE_MPIUNI)
VecDuplicate_FFTW_fin(Vec fin,Vec * fin_new)399 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
400 {
401 Mat A;
402
403 PetscFunctionBegin;
404 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
405 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
406 PetscFunctionReturn(PETSC_SUCCESS);
407 }
408
VecDuplicate_FFTW_fout(Vec fout,Vec * fout_new)409 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
410 {
411 Mat A;
412
413 PetscFunctionBegin;
414 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
415 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
416 PetscFunctionReturn(PETSC_SUCCESS);
417 }
418
VecDuplicate_FFTW_bout(Vec bout,Vec * bout_new)419 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
420 {
421 Mat A;
422
423 PetscFunctionBegin;
424 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
425 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
426 PetscFunctionReturn(PETSC_SUCCESS);
427 }
428 #endif
429
430 /*@
431 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
432 parallel layout determined by `MATFFTW`
433
434 Collective
435
436 Input Parameter:
437 . A - the matrix
438
439 Output Parameters:
440 + x - (optional) input vector of forward FFTW
441 . y - (optional) output vector of forward FFTW
442 - z - (optional) output vector of backward FFTW
443
444 Options Database Key:
445 . -mat_fftw_plannerflags - set FFTW planner flags
446
447 Level: advanced
448
449 Notes:
450 The parallel layout of output of forward FFTW is always same as the input
451 of the backward FFTW. But parallel layout of the input vector of forward
452 FFTW might not be same as the output of backward FFTW.
453
454 We need to provide enough space while doing parallel real transform.
455 We need to pad extra zeros at the end of last dimension. For this reason the one needs to
456 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
457 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
458 depends on if the last dimension is even or odd. If the last dimension is even need to pad two
459 zeros if it is odd only one zero is needed.
460
461 Lastly one needs some scratch space at the end of data set in each process. alloc_local
462 figures out how much space is needed, i.e. it figures out the data+scratch space for
463 each processor and returns that.
464
465 Developer Notes:
466 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
467
468 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
469 @*/
MatCreateVecsFFTW(Mat A,Vec * x,Vec * y,Vec * z)470 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
471 {
472 PetscFunctionBegin;
473 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
474 PetscFunctionReturn(PETSC_SUCCESS);
475 }
476
MatCreateVecsFFTW_FFTW(Mat A,Vec * fin,Vec * fout,Vec * bout)477 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
478 {
479 PetscMPIInt size, rank;
480 MPI_Comm comm;
481 Mat_FFT *fft = (Mat_FFT *)A->data;
482
483 PetscFunctionBegin;
484 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
485 PetscValidType(A, 1);
486 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
487
488 PetscCallMPI(MPI_Comm_size(comm, &size));
489 PetscCallMPI(MPI_Comm_rank(comm, &rank));
490 if (size == 1) { /* sequential case */
491 #if defined(PETSC_USE_COMPLEX)
492 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
493 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
494 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
495 #else
496 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
497 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
498 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
499 #endif
500 #if !PetscDefined(HAVE_MPIUNI)
501 } else { /* parallel cases */
502 Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
503 PetscInt ndim = fft->ndim, *dim = fft->dim;
504 ptrdiff_t alloc_local, local_n0, local_0_start;
505 ptrdiff_t local_n1;
506 fftw_complex *data_fout;
507 ptrdiff_t local_1_start;
508 PetscInt n1, N1;
509 #if defined(PETSC_USE_COMPLEX)
510 fftw_complex *data_fin, *data_bout;
511 #else
512 double *data_finr, *data_boutr;
513 ptrdiff_t temp;
514 #endif
515
516 switch (ndim) {
517 case 1:
518 #if !defined(PETSC_USE_COMPLEX)
519 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
520 #else
521 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
522 if (fin) {
523 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
524 PetscCall(PetscIntCast(local_n0, &n1));
525 PetscCall(PetscIntCast(fft->N, &N1));
526 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fin, fin));
527 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
528 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
529 (*fin)->ops->destroy = VecDestroy_MPIFFTW;
530 }
531 if (fout) {
532 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
533 PetscCall(PetscIntCast(local_n1, &n1));
534 PetscCall(PetscIntCast(fft->N, &N1));
535 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fout, fout));
536 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
537 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
538 (*fout)->ops->destroy = VecDestroy_MPIFFTW;
539 }
540 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
541 if (bout) {
542 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
543 PetscCall(PetscIntCast(local_n1, &n1));
544 PetscCall(PetscIntCast(fft->N, &N1));
545 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_bout, bout));
546 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
547 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
548 (*bout)->ops->destroy = VecDestroy_MPIFFTW;
549 }
550 break;
551 #endif
552 case 2:
553 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
554 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
555 PetscCall(PetscIntCast(2 * dim[0] * (dim[1] / 2 + 1), &N1));
556 PetscCall(PetscIntCast(2 * local_n0 * (dim[1] / 2 + 1), &n1));
557 if (fin) {
558 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
559 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
560 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
561 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
562 (*fin)->ops->destroy = VecDestroy_MPIFFTW;
563 }
564 if (fout) {
565 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
566 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
567 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
568 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
569 (*fout)->ops->destroy = VecDestroy_MPIFFTW;
570 }
571 if (bout) {
572 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
573 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
574 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
575 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
576 (*bout)->ops->destroy = VecDestroy_MPIFFTW;
577 }
578 #else
579 /* Get local size */
580 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
581 if (fin) {
582 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
583 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
584 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
585 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
586 (*fin)->ops->destroy = VecDestroy_MPIFFTW;
587 }
588 if (fout) {
589 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
590 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
591 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
592 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
593 (*fout)->ops->destroy = VecDestroy_MPIFFTW;
594 }
595 if (bout) {
596 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
597 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
598 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
599 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
600 (*bout)->ops->destroy = VecDestroy_MPIFFTW;
601 }
602 #endif
603 break;
604 case 3:
605 #if !defined(PETSC_USE_COMPLEX)
606 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
607 PetscCall(PetscIntCast(2 * dim[0] * dim[1] * (dim[2] / 2 + 1), &N1));
608 PetscCall(PetscIntCast(2 * local_n0 * dim[1] * (dim[2] / 2 + 1), &n1));
609 if (fin) {
610 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
611 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
612 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
613 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
614 (*fin)->ops->destroy = VecDestroy_MPIFFTW;
615 }
616 if (fout) {
617 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
618 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
619 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
620 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
621 (*fout)->ops->destroy = VecDestroy_MPIFFTW;
622 }
623 if (bout) {
624 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
625 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
626 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
627 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
628 (*bout)->ops->destroy = VecDestroy_MPIFFTW;
629 }
630 #else
631 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
632 if (fin) {
633 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
634 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
635 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
636 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
637 (*fin)->ops->destroy = VecDestroy_MPIFFTW;
638 }
639 if (fout) {
640 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
641 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
642 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
643 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
644 (*fout)->ops->destroy = VecDestroy_MPIFFTW;
645 }
646 if (bout) {
647 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
648 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
649 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
650 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
651 (*bout)->ops->destroy = VecDestroy_MPIFFTW;
652 }
653 #endif
654 break;
655 default:
656 #if !defined(PETSC_USE_COMPLEX)
657 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
658 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
659 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
660 N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
661 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
662
663 if (fin) {
664 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
665 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
666 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
667 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
668 (*fin)->ops->destroy = VecDestroy_MPIFFTW;
669 }
670 if (fout) {
671 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
672 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
673 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
674 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
675 (*fout)->ops->destroy = VecDestroy_MPIFFTW;
676 }
677 if (bout) {
678 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
679 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
680 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
681 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
682 (*bout)->ops->destroy = VecDestroy_MPIFFTW;
683 }
684 #else
685 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
686 if (fin) {
687 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
688 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
689 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
690 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
691 (*fin)->ops->destroy = VecDestroy_MPIFFTW;
692 }
693 if (fout) {
694 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
695 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
696 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
697 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
698 (*fout)->ops->destroy = VecDestroy_MPIFFTW;
699 }
700 if (bout) {
701 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
702 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
703 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
704 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
705 (*bout)->ops->destroy = VecDestroy_MPIFFTW;
706 }
707 #endif
708 break;
709 }
710 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
711 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
712 memory leaks. We void these pointers here to avoid accident uses.
713 */
714 if (fin) (*fin)->ops->replacearray = NULL;
715 if (fout) (*fout)->ops->replacearray = NULL;
716 if (bout) (*bout)->ops->replacearray = NULL;
717 #endif
718 }
719 PetscFunctionReturn(PETSC_SUCCESS);
720 }
721
722 /*@
723 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
724
725 Collective
726
727 Input Parameters:
728 + A - FFTW matrix
729 - x - the PETSc vector
730
731 Output Parameter:
732 . y - the FFTW vector
733
734 Level: intermediate
735
736 Note:
737 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
738 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
739 zeros. This routine does that job by scattering operation.
740
741 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
742 @*/
VecScatterPetscToFFTW(Mat A,Vec x,Vec y)743 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
744 {
745 PetscFunctionBegin;
746 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
747 PetscFunctionReturn(PETSC_SUCCESS);
748 }
749
VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)750 static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
751 {
752 MPI_Comm comm;
753 Mat_FFT *fft = (Mat_FFT *)A->data;
754 PetscInt low;
755 PetscMPIInt rank, size;
756 PetscInt vsize, vsize1;
757 VecScatter vecscat;
758 IS list1;
759
760 PetscFunctionBegin;
761 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
762 PetscCallMPI(MPI_Comm_size(comm, &size));
763 PetscCallMPI(MPI_Comm_rank(comm, &rank));
764 PetscCall(VecGetOwnershipRange(y, &low, NULL));
765
766 if (size == 1) {
767 PetscCall(VecGetSize(x, &vsize));
768 PetscCall(VecGetSize(y, &vsize1));
769 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
770 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
771 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
772 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
773 PetscCall(VecScatterDestroy(&vecscat));
774 PetscCall(ISDestroy(&list1));
775 #if !PetscDefined(HAVE_MPIUNI)
776 } else {
777 Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
778 PetscInt ndim = fft->ndim, *dim = fft->dim, n1;
779 ptrdiff_t local_n0, local_0_start;
780 ptrdiff_t local_n1, local_1_start;
781 IS list2;
782 #if !defined(PETSC_USE_COMPLEX)
783 PetscInt i, j, k, partial_dim;
784 PetscInt *indx1, *indx2, tempindx, tempindx1;
785 PetscInt NM;
786 ptrdiff_t temp;
787 #else
788 PetscInt nstart, nlow;
789 #endif
790
791 switch (ndim) {
792 case 1:
793 #if defined(PETSC_USE_COMPLEX)
794 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
795
796 PetscCall(PetscIntCast(local_n0, &n1));
797 PetscCall(PetscIntCast(local_0_start, &nstart));
798 PetscCall(PetscIntCast(low, &nlow));
799 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
800 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
801 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
802 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
803 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
804 PetscCall(VecScatterDestroy(&vecscat));
805 PetscCall(ISDestroy(&list1));
806 PetscCall(ISDestroy(&list2));
807 #else
808 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
809 #endif
810 break;
811 case 2:
812 #if defined(PETSC_USE_COMPLEX)
813 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
814
815 PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
816 PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
817 PetscCall(PetscIntCast(low, &nlow));
818 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
819 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
820 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
821 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
822 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
823 PetscCall(VecScatterDestroy(&vecscat));
824 PetscCall(ISDestroy(&list1));
825 PetscCall(ISDestroy(&list2));
826 #else
827 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
828
829 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
830 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
831
832 if (dim[1] % 2 == 0) {
833 NM = dim[1] + 2;
834 } else {
835 NM = dim[1] + 1;
836 }
837 for (i = 0; i < local_n0; i++) {
838 for (j = 0; j < dim[1]; j++) {
839 tempindx = i * dim[1] + j;
840 tempindx1 = i * NM + j;
841
842 PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
843 indx2[tempindx] = low + tempindx1;
844 }
845 }
846
847 PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
848 PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
849 PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
850
851 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
852 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
853 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
854 PetscCall(VecScatterDestroy(&vecscat));
855 PetscCall(ISDestroy(&list1));
856 PetscCall(ISDestroy(&list2));
857 PetscCall(PetscFree(indx1));
858 PetscCall(PetscFree(indx2));
859 #endif
860 break;
861
862 case 3:
863 #if defined(PETSC_USE_COMPLEX)
864 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
865
866 PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
867 PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
868 PetscCall(PetscIntCast(low, &nlow));
869 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
870 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
871 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
872 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
873 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
874 PetscCall(VecScatterDestroy(&vecscat));
875 PetscCall(ISDestroy(&list1));
876 PetscCall(ISDestroy(&list2));
877 #else
878 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
879 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
880 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
881
882 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
883 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
884
885 if (dim[2] % 2 == 0) NM = dim[2] + 2;
886 else NM = dim[2] + 1;
887
888 for (i = 0; i < local_n0; i++) {
889 for (j = 0; j < dim[1]; j++) {
890 for (k = 0; k < dim[2]; k++) {
891 tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
892 tempindx1 = i * dim[1] * NM + j * NM + k;
893
894 PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
895 PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
896 }
897 }
898 }
899
900 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
901 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
902 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
903 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
904 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
905 PetscCall(VecScatterDestroy(&vecscat));
906 PetscCall(ISDestroy(&list1));
907 PetscCall(ISDestroy(&list2));
908 PetscCall(PetscFree(indx1));
909 PetscCall(PetscFree(indx2));
910 #endif
911 break;
912
913 default:
914 #if defined(PETSC_USE_COMPLEX)
915 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
916
917 PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
918 PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
919 PetscCall(PetscIntCast(low, &nlow));
920 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
921 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
922 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
923 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
924 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
925 PetscCall(VecScatterDestroy(&vecscat));
926 PetscCall(ISDestroy(&list1));
927 PetscCall(ISDestroy(&list2));
928 #else
929 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
930 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
931 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
932
933 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
934
935 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
936
937 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
938
939 partial_dim = fftw->partial_dim;
940
941 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
942 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
943
944 if (dim[ndim - 1] % 2 == 0) NM = 2;
945 else NM = 1;
946
947 j = low;
948 for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
949 indx1[i] = local_0_start * partial_dim + i;
950 indx2[i] = j;
951 if (k % dim[ndim - 1] == 0) j += NM;
952 j++;
953 }
954 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
955 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
956 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
957 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
958 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
959 PetscCall(VecScatterDestroy(&vecscat));
960 PetscCall(ISDestroy(&list1));
961 PetscCall(ISDestroy(&list2));
962 PetscCall(PetscFree(indx1));
963 PetscCall(PetscFree(indx2));
964 #endif
965 break;
966 }
967 #endif
968 }
969 PetscFunctionReturn(PETSC_SUCCESS);
970 }
971
972 /*@
973 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
974
975 Collective
976
977 Input Parameters:
978 + A - `MATFFTW` matrix
979 - x - FFTW vector
980
981 Output Parameter:
982 . y - PETSc vector
983
984 Level: intermediate
985
986 Note:
987 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
988 `VecScatterFFTWToPetsc()` removes those extra zeros.
989
990 .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
991 @*/
VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)992 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
993 {
994 PetscFunctionBegin;
995 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
996 PetscFunctionReturn(PETSC_SUCCESS);
997 }
998
VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)999 static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
1000 {
1001 MPI_Comm comm;
1002 Mat_FFT *fft = (Mat_FFT *)A->data;
1003 PetscInt low;
1004 PetscMPIInt rank, size;
1005 VecScatter vecscat;
1006 IS list1;
1007
1008 PetscFunctionBegin;
1009 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1010 PetscCallMPI(MPI_Comm_size(comm, &size));
1011 PetscCallMPI(MPI_Comm_rank(comm, &rank));
1012 PetscCall(VecGetOwnershipRange(x, &low, NULL));
1013
1014 if (size == 1) {
1015 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
1016 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
1017 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1018 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1019 PetscCall(VecScatterDestroy(&vecscat));
1020 PetscCall(ISDestroy(&list1));
1021
1022 #if !PetscDefined(HAVE_MPIUNI)
1023 } else {
1024 Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1025 PetscInt ndim = fft->ndim, *dim = fft->dim;
1026 ptrdiff_t local_n0, local_0_start;
1027 ptrdiff_t local_n1, local_1_start;
1028 IS list2;
1029 #if !defined(PETSC_USE_COMPLEX)
1030 PetscInt i, j, k, partial_dim;
1031 PetscInt *indx1, *indx2, tempindx, tempindx1;
1032 PetscInt NM;
1033 ptrdiff_t temp;
1034 #else
1035 PetscInt nlow, nstart;
1036 #endif
1037 PetscInt n1;
1038 switch (ndim) {
1039 case 1:
1040 #if defined(PETSC_USE_COMPLEX)
1041 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1042
1043 PetscCall(PetscIntCast(local_n1, &n1));
1044 PetscCall(PetscIntCast(local_1_start, &nstart));
1045 PetscCall(PetscIntCast(low, &nlow));
1046 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1047 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1048 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1049 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1050 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1051 PetscCall(VecScatterDestroy(&vecscat));
1052 PetscCall(ISDestroy(&list1));
1053 PetscCall(ISDestroy(&list2));
1054 #else
1055 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1056 #endif
1057 break;
1058 case 2:
1059 #if defined(PETSC_USE_COMPLEX)
1060 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1061
1062 PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1063 PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
1064 PetscCall(PetscIntCast(low, &nlow));
1065 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1066 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1067 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1068 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1069 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1070 PetscCall(VecScatterDestroy(&vecscat));
1071 PetscCall(ISDestroy(&list1));
1072 PetscCall(ISDestroy(&list2));
1073 #else
1074 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1075
1076 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1077 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
1078
1079 if (dim[1] % 2 == 0) NM = dim[1] + 2;
1080 else NM = dim[1] + 1;
1081
1082 for (i = 0; i < local_n0; i++) {
1083 for (j = 0; j < dim[1]; j++) {
1084 tempindx = i * dim[1] + j;
1085 tempindx1 = i * NM + j;
1086
1087 PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
1088 PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
1089 }
1090 }
1091
1092 PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1093 PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1094 PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1095
1096 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1097 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1098 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1099 PetscCall(VecScatterDestroy(&vecscat));
1100 PetscCall(ISDestroy(&list1));
1101 PetscCall(ISDestroy(&list2));
1102 PetscCall(PetscFree(indx1));
1103 PetscCall(PetscFree(indx2));
1104 #endif
1105 break;
1106 case 3:
1107 #if defined(PETSC_USE_COMPLEX)
1108 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1109
1110 PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1111 PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
1112 PetscCall(PetscIntCast(low, &nlow));
1113 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1114 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1115 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1116 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1117 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1118 PetscCall(VecScatterDestroy(&vecscat));
1119 PetscCall(ISDestroy(&list1));
1120 PetscCall(ISDestroy(&list2));
1121 #else
1122 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1123
1124 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1125 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
1126
1127 if (dim[2] % 2 == 0) NM = dim[2] + 2;
1128 else NM = dim[2] + 1;
1129
1130 for (i = 0; i < local_n0; i++) {
1131 for (j = 0; j < dim[1]; j++) {
1132 for (k = 0; k < dim[2]; k++) {
1133 tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1134 tempindx1 = i * dim[1] * NM + j * NM + k;
1135
1136 PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
1137 PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
1138 }
1139 }
1140 }
1141
1142 PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1143 PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1144 PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1145
1146 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1147 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1148 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1149 PetscCall(VecScatterDestroy(&vecscat));
1150 PetscCall(ISDestroy(&list1));
1151 PetscCall(ISDestroy(&list2));
1152 PetscCall(PetscFree(indx1));
1153 PetscCall(PetscFree(indx2));
1154 #endif
1155 break;
1156 default:
1157 #if defined(PETSC_USE_COMPLEX)
1158 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1159
1160 PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
1161 PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
1162 PetscCall(PetscIntCast(low, &nlow));
1163 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1164 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1165 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1166 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1167 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1168 PetscCall(VecScatterDestroy(&vecscat));
1169 PetscCall(ISDestroy(&list1));
1170 PetscCall(ISDestroy(&list2));
1171 #else
1172 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1173
1174 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1175
1176 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1177
1178 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1179
1180 partial_dim = fftw->partial_dim;
1181
1182 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1183 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1184
1185 if (dim[ndim - 1] % 2 == 0) NM = 2;
1186 else NM = 1;
1187
1188 j = low;
1189 for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1190 PetscCall(PetscIntCast(local_0_start * partial_dim + i, &indx1[i]));
1191 indx2[i] = j;
1192 if (k % dim[ndim - 1] == 0) j += NM;
1193 j++;
1194 }
1195 PetscCall(PetscIntCast(local_n0 * partial_dim, &n1));
1196 PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1197 PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1198
1199 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1200 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1201 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1202 PetscCall(VecScatterDestroy(&vecscat));
1203 PetscCall(ISDestroy(&list1));
1204 PetscCall(ISDestroy(&list2));
1205 PetscCall(PetscFree(indx1));
1206 PetscCall(PetscFree(indx2));
1207 #endif
1208 break;
1209 }
1210 #endif
1211 }
1212 PetscFunctionReturn(PETSC_SUCCESS);
1213 }
1214
1215 /*MC
1216 MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package.
1217
1218 Level: intermediate
1219
1220 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1221 M*/
MatCreate_FFTW(Mat A)1222 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1223 {
1224 MPI_Comm comm;
1225 Mat_FFT *fft = (Mat_FFT *)A->data;
1226 Mat_FFTW *fftw;
1227 PetscInt ndim = fft->ndim, *dim = fft->dim;
1228 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1229 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1230 PetscBool flg;
1231 PetscInt p_flag, partial_dim = 1, ctr;
1232 PetscMPIInt size, rank;
1233 ptrdiff_t *pdim;
1234 #if !defined(PETSC_USE_COMPLEX)
1235 PetscInt tot_dim;
1236 #endif
1237
1238 PetscFunctionBegin;
1239 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1240 PetscCallMPI(MPI_Comm_size(comm, &size));
1241 PetscCallMPI(MPI_Comm_rank(comm, &rank));
1242
1243 #if !PetscDefined(HAVE_MPIUNI)
1244 fftw_mpi_init();
1245 #endif
1246 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1247 pdim[0] = dim[0];
1248 #if !defined(PETSC_USE_COMPLEX)
1249 tot_dim = 2 * dim[0];
1250 #endif
1251 for (ctr = 1; ctr < ndim; ctr++) {
1252 partial_dim *= dim[ctr];
1253 pdim[ctr] = dim[ctr];
1254 #if !defined(PETSC_USE_COMPLEX)
1255 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1256 else tot_dim *= dim[ctr];
1257 #endif
1258 }
1259
1260 if (size == 1) {
1261 #if defined(PETSC_USE_COMPLEX)
1262 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1263 fft->n = fft->N;
1264 #else
1265 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1266 fft->n = tot_dim;
1267 #endif
1268 #if !PetscDefined(HAVE_MPIUNI)
1269 } else {
1270 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1271 #if !defined(PETSC_USE_COMPLEX)
1272 ptrdiff_t temp;
1273 PetscInt N1;
1274 #else
1275 PetscInt n1;
1276 #endif
1277
1278 switch (ndim) {
1279 case 1:
1280 #if !defined(PETSC_USE_COMPLEX)
1281 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1282 #else
1283 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1284 PetscCall(PetscIntCast(local_n0, &fft->n));
1285 PetscCall(PetscIntCast(local_n1, &n1));
1286 PetscCall(MatSetSizes(A, n1, fft->n, fft->N, fft->N));
1287 #endif
1288 break;
1289 case 2:
1290 #if defined(PETSC_USE_COMPLEX)
1291 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1292 fft->n = (PetscInt)local_n0 * dim[1];
1293 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1294 #else
1295 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1296
1297 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1298 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1299 #endif
1300 break;
1301 case 3:
1302 #if defined(PETSC_USE_COMPLEX)
1303 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1304
1305 fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1306 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1307 #else
1308 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1309
1310 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1311 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1312 #endif
1313 break;
1314 default:
1315 #if defined(PETSC_USE_COMPLEX)
1316 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1317
1318 PetscCall(PetscIntCast(local_n0 * partial_dim, &fft->n));
1319 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1320 #else
1321 temp = pdim[ndim - 1];
1322
1323 pdim[ndim - 1] = temp / 2 + 1;
1324
1325 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1326
1327 PetscCall(PetscIntCast(2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp, &fft->n));
1328 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1329
1330 pdim[ndim - 1] = temp;
1331
1332 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1333 #endif
1334 break;
1335 }
1336 #endif
1337 }
1338 free(pdim);
1339 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1340 PetscCall(PetscNew(&fftw));
1341 fft->data = (void *)fftw;
1342
1343 fftw->ndim_fftw = ndim; /* This is dimension of fft */
1344 fftw->partial_dim = partial_dim;
1345
1346 PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1347 if (size == 1) {
1348 #if defined(PETSC_USE_64BIT_INDICES)
1349 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1350 #else
1351 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1352 #endif
1353 }
1354
1355 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1356
1357 fftw->p_forward = NULL;
1358 fftw->p_backward = NULL;
1359 fftw->p_flag = FFTW_ESTIMATE;
1360
1361 if (size == 1) {
1362 A->ops->mult = MatMult_SeqFFTW;
1363 A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1364 #if !PetscDefined(HAVE_MPIUNI)
1365 } else {
1366 A->ops->mult = MatMult_MPIFFTW;
1367 A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1368 #endif
1369 }
1370 fft->matdestroy = MatDestroy_FFTW;
1371 A->assembled = PETSC_TRUE;
1372 A->preallocated = PETSC_TRUE;
1373
1374 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1375 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1376 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1377
1378 /* get runtime options */
1379 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1380 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1381 if (flg) fftw->p_flag = iplans[p_flag];
1382 PetscOptionsEnd();
1383 PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385